diff --git a/.typos.toml b/.typos.toml index 70eeb7f618..7776161986 100644 --- a/.typos.toml +++ b/.typos.toml @@ -24,5 +24,8 @@ USCALED = "USCALED" Datas = "Datas" HSA = "HSA" VALU = "VALU" +dne = "dne" offen = "offen" +varing = "varing" Derivate = "Derivate" +Fo = "Fo" diff --git a/CMakeLists.txt b/CMakeLists.txt index 54f9769ad8..790eb47185 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,6 +129,8 @@ if(ICD_BUILD_LLPC) add_subdirectory(util ${PROJECT_BINARY_DIR}/util) endif() +add_subdirectory(gfxruntime ${PROJECT_BINARY_DIR}/gfxruntime) + ### VKGC build LLPC ################################################################ if(ICD_BUILD_LLPC) include("cmake/compilerutils.cmake") @@ -171,6 +173,10 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") set_property(TARGET vfx PROPERTY FOLDER Compiler) endif() + if (TARGET gfxruntime) + set_property(TARGET gfxruntime PROPERTY FOLDER Compiler) + endif() + if(ICD_BUILD_LLPC) set_property(TARGET llpc PROPERTY FOLDER Compiler) set_property(TARGET llpcinternal PROPERTY FOLDER Compiler) @@ -212,8 +218,8 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") endif() set_property(TARGET check-amdllpc PROPERTY FOLDER "LLPC Tests") set_property(TARGET check-amdllpc-units PROPERTY FOLDER "LLPC Tests") - set_property(TARGET check-continuations PROPERTY FOLDER "Continuations Tests") - set_property(TARGET check-continuations-units PROPERTY FOLDER "Continuations Tests") + set_property(TARGET check-llvmraytracing PROPERTY FOLDER "LLVMRaytracing Tests") + set_property(TARGET check-llvmraytracing-units PROPERTY FOLDER "LLVMRaytracing Tests") set_property(TARGET check-lgc-units PROPERTY FOLDER "LGC Tests") endif() endif() diff --git a/cmake/CompilerFlags.cmake b/cmake/CompilerFlags.cmake index f586f4bdbe..9fec02583e 100644 --- a/cmake/CompilerFlags.cmake +++ b/cmake/CompilerFlags.cmake @@ -68,6 +68,7 @@ function(set_compiler_options PROJECT_NAME ENABLE_WERROR) target_compile_options("${PROJECT_NAME}" PRIVATE # Output with color if in terminal: https://github.com/ninja-build/ninja/wiki/FAQ -fcolor-diagnostics + -Werror=unused-private-field -Wno-covered-switch-default -Wno-extra-semi -Wno-gnu-anonymous-struct diff --git a/compilerutils/include/compilerutils/CompilerUtils.h b/compilerutils/include/compilerutils/CompilerUtils.h index b324938d89..3669ebeb2b 100644 --- a/compilerutils/include/compilerutils/CompilerUtils.h +++ b/compilerutils/include/compilerutils/CompilerUtils.h @@ -67,6 +67,9 @@ llvm::Function *cloneFunctionHeader(llvm::Function &f, llvm::FunctionType *newTy llvm::Function *cloneFunctionHeader(llvm::Function &f, llvm::FunctionType *newType, llvm::ArrayRef argAttrs, llvm::Module *targetModule = nullptr); +// Add an unreachable at the current position and remove the rest of the basic block. +void createUnreachable(llvm::IRBuilder<> &b); + struct CrossModuleInlinerResult { llvm::Value *returnValue; llvm::iterator_range newBBs; @@ -104,9 +107,37 @@ class CrossModuleInliner { llvm::GlobalValue *findCopiedGlobal(llvm::GlobalValue &sourceGv, llvm::Module &targetModule); private: + // Checks that we haven't processed a different target module earlier. + void checkTargetModule(llvm::Module &targetModule) { + if (lastUsedTargetModule == nullptr) + lastUsedTargetModule = &targetModule; + else + assert(lastUsedTargetModule == &targetModule); + } + llvm::SmallDenseMap mappedGlobals; + llvm::Module *lastUsedTargetModule = nullptr; // used to check that we don't use different target modules }; +// Essentially RAUW for pointers for the case that these use different address +// spaces, rewriting all derived pointers to also use the new address space. +// Writes instructions which are redundant after the replacement into +// the given ToBeRemoved vector. +// The caller has to handle the erasure afterwards. +void replaceAllPointerUses(llvm::IRBuilder<> *builder, llvm::Value *oldPointerValue, llvm::Value *newPointerValue, + llvm::SmallVectorImpl &toBeRemoved); + } // namespace CompilerUtils +namespace llvm { + +// Replacement for PointerType::getWithSamePointeeType that works with new LLVM. +// Returns a typed pointer type if the pointer type is typed. +// +// TODO: Remove this as soon as all internal users of opaque pointers have been +// fixed. +PointerType *getWithSamePointeeType(PointerType *ptrTy, unsigned addressSpace); + +} // namespace llvm + #endif diff --git a/compilerutils/include/compilerutils/LoweringPointerTupleMap.h b/compilerutils/include/compilerutils/LoweringPointerTupleMap.h index 60c3c84327..0da24faa5c 100644 --- a/compilerutils/include/compilerutils/LoweringPointerTupleMap.h +++ b/compilerutils/include/compilerutils/LoweringPointerTupleMap.h @@ -71,7 +71,7 @@ template class LoweringPoint using ReverseMap = std::conditional_t>, Empty>; /// If requested, track the locations in which each value is mentioned. - [[no_unique_address]] ReverseMap m_reverseMap; + ReverseMap m_reverseMap; /// Map keys to values. /// diff --git a/compilerutils/lib/CompilerUtils.cpp b/compilerutils/lib/CompilerUtils.cpp index dd82f770ac..40cc96bd83 100644 --- a/compilerutils/lib/CompilerUtils.cpp +++ b/compilerutils/lib/CompilerUtils.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -135,6 +136,20 @@ Function *CompilerUtils::cloneFunctionHeader(Function &f, FunctionType *newType, return cloneFunctionHeader(f, newType, attributes, targetModule); } +void CompilerUtils::createUnreachable(llvm::IRBuilder<> &b) { + auto *unreachable = b.CreateUnreachable(); + auto it = ++unreachable->getIterator(); + auto *bb = unreachable->getParent(); + if (it == bb->end()) + return; + + // Remove rest of BB + auto *oldCode = BasicBlock::Create(b.getContext(), "", bb->getParent()); + oldCode->splice(oldCode->end(), bb, it, bb->end()); + oldCode->replaceSuccessorsPhiUsesWith(bb, oldCode); + DeleteDeadBlock(oldCode); +} + namespace { // Get the name of a global that is copied to a different module for inlining. @@ -220,6 +235,7 @@ class CrossModuleValueMaterializer : public ValueMaterializer { iterator_range CompilerUtils::CrossModuleInliner::inlineCall(CallBase &cb) { auto *calleeFunc = cb.getCalledFunction(); assert(calleeFunc && "Cannot find called function"); + checkTargetModule(*cb.getFunction()->getParent()); LLVM_DEBUG(dbgs() << "Inlining '" << calleeFunc->getName() << "' across modules\n"); Function *targetFunc = cb.getFunction(); @@ -346,6 +362,7 @@ CompilerUtils::CrossModuleInliner::inlineCall(IRBuilder<> &b, llvm::Function *ca GlobalValue *CompilerUtils::CrossModuleInliner::findCopiedGlobal(GlobalValue &sourceGv, Module &targetModule) { assert(sourceGv.getParent() != &targetModule && "This function only finds copies across modules"); assert(sourceGv.hasName() && "Cannot find a global value that does not have a name"); + checkTargetModule(targetModule); if (auto found = mappedGlobals.find(&sourceGv); found != mappedGlobals.end()) { assert(found->second->getParent() == &targetModule && @@ -358,3 +375,126 @@ GlobalValue *CompilerUtils::CrossModuleInliner::findCopiedGlobal(GlobalValue &so assert(gv->getValueType() == sourceGv.getValueType()); return gv; } + +PointerType *llvm::getWithSamePointeeType(PointerType *ptrTy, unsigned addressSpace) { +#if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 482880 + return PointerType::getWithSamePointeeType(ptrTy, addressSpace); +#else + // New version of the code (also handles unknown version, which we treat as + // latest) + return PointerType::get(ptrTy->getContext(), addressSpace); +#endif +} + +void CompilerUtils::replaceAllPointerUses(IRBuilder<> *builder, Value *oldPointerValue, Value *newPointerValue, + SmallVectorImpl &toBeRemoved) { + // Note: The implementation explicitly supports typed pointers, which + // complicates some of the code below. + + // Assert that both types are pointers that only differ in the address space. + PointerType *oldPtrTy = cast(oldPointerValue->getType()); + (void)oldPtrTy; + PointerType *newPtrTy = cast(newPointerValue->getType()); + unsigned newAS = newPtrTy->getAddressSpace(); + assert(newAS != oldPtrTy->getAddressSpace()); + assert(getWithSamePointeeType(oldPtrTy, newAS) == newPtrTy); + + oldPointerValue->mutateType(newPtrTy); + + // Traverse through the users and setup the addrspace + SmallVector worklist(make_pointer_range(oldPointerValue->uses())); + oldPointerValue->replaceAllUsesWith(newPointerValue); + + // Given a pointer type, get a pointer with the same pointee type (possibly + // opaque) as the given type that uses the newAS address space. + auto getMutatedPtrTy = [newAS](Type *ty) { + PointerType *ptrTy = cast(ty); + // Support typed pointers: + return getWithSamePointeeType(ptrTy, newAS); + }; + + while (!worklist.empty()) { + Use *ptrUse = worklist.pop_back_val(); + Value *ptr = cast(ptrUse); + Instruction *inst = cast(ptrUse->getUser()); + LLVM_DEBUG(dbgs() << "Visiting " << *inst << '\n'); + // In the switch below, "break" means to continue with replacing + // the users of the current value, while "continue" means to stop at + // the current value, and proceed with next one from the work list. + auto usesRange = make_pointer_range(inst->uses()); + switch (inst->getOpcode()) { + default: + LLVM_DEBUG(inst->dump()); + llvm_unreachable("Unhandled instruction\n"); + break; + case Instruction::Call: { + if (inst->isLifetimeStartOrEnd()) { + // The lifetime marker is not useful anymore. + inst->eraseFromParent(); + } else { + LLVM_DEBUG(inst->dump()); + llvm_unreachable("Unhandled call instruction\n"); + } + // No further processing needed for the users. + continue; + } + case Instruction::Load: + case Instruction::Store: + // No further processing needed for the users. + continue; + case Instruction::InsertValue: + // For insertvalue, there could be 2 cases: + // Assume %ptr = ptrtoint ... to i32 + // (1) %inserted = insertvalue [2 x i32] poison, i32 %ptr, 0 + // (2) %0 = bitcast i32 %ptr to [2 x i16] + // %inserted = insertvalue [2 x i16], i32 1, 0 + // For (1), no further handling is needed; For (2), we are modifying the + // pointer and need to track all users of %inserted. + if (cast(inst)->getAggregateOperand() == ptr) { + break; + } + continue; + case Instruction::And: + case Instruction::Add: + case Instruction::PtrToInt: + break; + case Instruction::BitCast: { + // This can happen with typed pointers + assert(cast(inst)->getSrcTy()->isPointerTy() && + cast(inst)->getDestTy()->isPointerTy()); + inst->mutateType(getMutatedPtrTy(inst->getType())); + break; + } + case Instruction::AddrSpaceCast: + // Check that the pointer operand has already been fixed + assert(inst->getOperand(0)->getType()->getPointerAddressSpace() == newAS); + // Push the correct users before RAUW. + worklist.append(usesRange.begin(), usesRange.end()); + inst->mutateType(getMutatedPtrTy(inst->getType())); + // Since we are mutating the address spaces of users as well, + // we can just use the (already mutated) cast operand. + inst->replaceAllUsesWith(inst->getOperand(0)); + toBeRemoved.push_back(inst); + continue; + case Instruction::IntToPtr: + case Instruction::GetElementPtr: { + inst->mutateType(getMutatedPtrTy(inst->getType())); + break; + } + case Instruction::Select: { + auto *oldType = inst->getType(); + if (oldType->isPointerTy()) { + Type *newType = getMutatedPtrTy(oldType); + // No further processing if the type has the correct pointer type + if (newType == oldType) + continue; + + inst->mutateType(newType); + } + break; + } + } + + worklist.append(usesRange.begin(), usesRange.end()); + } +} diff --git a/gfxruntime/CMakeLists.txt b/gfxruntime/CMakeLists.txt new file mode 100644 index 0000000000..d18bce873c --- /dev/null +++ b/gfxruntime/CMakeLists.txt @@ -0,0 +1,87 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +add_library(gfxruntime) + +# Locate python binary +# No particular version of python3 is necessary +find_package(Python3 + # Disable information messages + QUIET + # Python3 is required to run the shader compile script + REQUIRED + # Only the interpreter is required, we don't need the Compiler, Development, or NumPy + COMPONENTS Interpreter +) + +# Locate dxc binary. +if (CMAKE_HOST_SYSTEM_NAME MATCHES "Linux") + find_program(DXC_PATH dxc) + if ("${DXC_PATH}" STREQUAL "DXC_PATH-NOTFOUND") + message(FATAL_ERROR "Could not find shader compiler tool dxc.") + endif() +#if _WIN32 +elseif(WIN32) + set(DXC_PATH "$ENV{DK_ROOT}/DirectXShaderCompiler/8c9d92b/bin") + if (NOT EXISTS "${DXC_PATH}") + message(FATAL_ERROR "Unable to find DirectXShaderCompiler directory: ${DXC_PATH}") + endif() +#endif +endif() + +set(CMAKE_CURRENT_SOURCE_DIR ${PROJECT_SOURCE_DIR}/gfxruntime) +set(CMAKE_CURRENT_BINARY_DIR ${PROJECT_BINARY_DIR}/gfxruntime) + +### Auto-generated advanced blend runtime library for graphics shaders ################################################# +set(GEN_ADVANCED_BLEND_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/tools/CompileAdvancedBlendShader.py") +set(ADVANCED_BLEND_INPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/shaders/AdvancedBlend.hlsl") +set(ADVANCED_BLEND_OUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/g_AdvancedBlendLibrary_spv.h") +add_custom_command( + OUTPUT + ${ADVANCED_BLEND_OUT_FILE} + DEPENDS + ${ADVANCED_BLEND_INPUT_FILE} + ${GEN_ADVANCED_BLEND_SCRIPT} + COMMAND Python3::Interpreter "${GEN_ADVANCED_BLEND_SCRIPT}" + --compilerPath "${DXC_PATH}" + --shaderDir "${CMAKE_CURRENT_SOURCE_DIR}/src/shaders" + --outputDir "${CMAKE_CURRENT_BINARY_DIR}" + COMMENT "Generating advanced blend runtime library" +) + +target_sources(gfxruntime + PRIVATE + ${ADVANCED_BLEND_OUT_FILE} + ${CMAKE_CURRENT_SOURCE_DIR}/src/GfxRuntimeLibrary.cpp +) +target_include_directories(gfxruntime + PUBLIC + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/include +) + +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + target_compile_options(gfxruntime PRIVATE -fPIC) +endif() diff --git a/gfxruntime/include/gfxruntime/GfxRuntimeLibrary.h b/gfxruntime/include/gfxruntime/GfxRuntimeLibrary.h new file mode 100644 index 0000000000..cd06da9149 --- /dev/null +++ b/gfxruntime/include/gfxruntime/GfxRuntimeLibrary.h @@ -0,0 +1,38 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file gfxRuntimeLibrary.h + * @brief VKGC header file: contains the functions to get runtime library + *********************************************************************************************************************** + */ +#pragma once +#include +#include +namespace Vkgc { + +std::pair GetAdvancedBlendLibrary(); + +} // namespace Vkgc diff --git a/gfxruntime/src/GfxRuntimeLibrary.cpp b/gfxruntime/src/GfxRuntimeLibrary.cpp new file mode 100644 index 0000000000..3230f9176a --- /dev/null +++ b/gfxruntime/src/GfxRuntimeLibrary.cpp @@ -0,0 +1,41 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file gfxRuntimeLibrary.cpp + * @brief VKGC source file: contains implementation of VKGC internal types and utility functions + *********************************************************************************************************************** + */ +#include "gfxruntime/GfxRuntimeLibrary.h" + +namespace Vkgc { + +#include "g_AdvancedBlendLibrary_spv.h" + +std::pair GetAdvancedBlendLibrary() { + return {sizeof(AdvancedBlendLibrary), AdvancedBlendLibrary}; +} + +} // namespace Vkgc diff --git a/gfxruntime/src/shaders/AdvancedBlend.hlsl b/gfxruntime/src/shaders/AdvancedBlend.hlsl new file mode 100644 index 0000000000..872f01b360 --- /dev/null +++ b/gfxruntime/src/shaders/AdvancedBlend.hlsl @@ -0,0 +1,355 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +// HLSL file for the Advanced Blend Equations Shader Runtime Library (SRL). +#ifndef __ADVANCED_BLEND_HLSL_H__ +#define __ADVANCED_BLEND_HLSL_H__ + +// These DUMMY_*_FUNC postfix stubs must be included at the end of every driver +// stub (AmdExt*) declaration to work around a Vulkan glslang issue where the +// compiler can't deal with calls to functions that don't have bodies. +// clang-format off +#if defined(AMD_VULKAN) +#define DUMMY_VOID_FUNC { } +#define DUMMY_INT_FUNC { return 0; } +#define DUMMY_INT2_FUNC { return int2(0, 0); } +#define DUMMY_FLOAT4_FUNC { return float4(0.0f, 0.0f, 0.0f, 0.0f); } +#else +#define DUMMY_VOID_FUNC ; +#define DUMMY_INT_FUNC ; +#define DUMMY_INT2_FUNC ; +#define DUMMY_FLOAT4_FUNC ; +#endif +// clang-format on + +// The following extension functions are general driver intrinsics +// clang-format off +float4 AmdExtFragCoord() DUMMY_FLOAT4_FUNC +int AmdExtSampleId() DUMMY_INT_FUNC + +float4 AmdAdvancedBlendTexelLoad(int4 imageLow, int4 imageHigh, int2 iCoord, int lod) DUMMY_FLOAT4_FUNC +float4 AmdAdvancedBlendTexelLoadFmask(int4 imageMsLow, int4 imageMsHigh, int4 fmaskLow, int4 fmaskHigh, int2 iCoord, int lod) DUMMY_FLOAT4_FUNC + +float4 AmdAdvancedBlendCoherentTexelLoad(float4 color, int2 iCoord, int sampleId) DUMMY_FLOAT4_FUNC +void AmdAdvancedBlendCoherentTexelStore(float4 color, int2 iCoord, int sampleId) DUMMY_VOID_FUNC + // clang-format on + + // clang-format off +enum BlendEquationEnum { + Multiply = 1, + Screen, + Overlay, + Darken, + Lighten, + ColorDodge, + ColorBun, + HardLight, + SoftLight, + Difference, + Exclusion, + HslHue, + HslSaturation, + HslColor, + HslLuminosity +}; +// clang-format on + +float AmdAdvancedBlendMultiply(float srcComponent, float dstComponent) { + return srcComponent * dstComponent; +} + +float AmdAdvancedBlendScreen(float srcComponent, float dstComponent) { + return srcComponent + dstComponent - (srcComponent * dstComponent); +} + +float AmdAdvancedBlendOverlay(float srcComponent, float dstComponent) { + if (dstComponent <= 0.5f) { + return 2.0f * srcComponent * dstComponent; + } else { + return 1.0f - (2.0f * (1.0f - srcComponent) * (1.0f - dstComponent)); + } +} + +float AmdAdvancedBlendDarken(float srcComponent, float dstComponent) { + if (srcComponent < dstComponent) { + return srcComponent; + } else { + return dstComponent; + } +} + +float AmdAdvancedBlendLighten(float srcComponent, float dstComponent) { + if (srcComponent < dstComponent) { + return dstComponent; + } else { + return srcComponent; + } +} + +float AmdAdvancedBlendColorDodge(float srcComponent, float dstComponent) { + if (dstComponent <= 0.0f) { + return 0.0f; + } else if (srcComponent >= 1.0f) { + return 1.0f; + } else { + float temp = dstComponent / (1.0f - srcComponent); + if (temp < 1.0f) { + return temp; + } else { + return 1.0f; + } + } +} + +float AmdAdvancedBlendColorBurn(float srcComponent, float dstComponent) { + if (dstComponent >= 1.0f) { + return 1.0f; + } else if (srcComponent <= 0.0f) { + return 0.0f; + } else { + float temp = (1.0f - dstComponent) / srcComponent; + if (temp < 1.0f) { + return 1.0f - temp; + } else { + return 0.0f; + } + } +} + +float AmdAdvancedBlendHardLight(float srcComponent, float dstComponent) { + if (srcComponent <= 0.5f) { + return 2.0f * srcComponent * dstComponent; + } else { + return 1.0f - (2.0f * (1.0f - srcComponent) * (1.0f - dstComponent)); + } +} + +float AmdAdvancedBlendSoftLight(float srcComponent, float dstComponent) { + if (srcComponent <= 0.5f) { + return dstComponent - ((1.0f - (2.0f * srcComponent)) * dstComponent * (1.0f - dstComponent)); + } else if (dstComponent <= 0.25f) { + return dstComponent + + (((2.0f * srcComponent) - 1.0f) * dstComponent * (((16.0f * dstComponent) - 12.0f) * dstComponent + 3.0f)); + } else { + return dstComponent + (2.0f * srcComponent - 1.0f) * (sqrt(dstComponent) - dstComponent); + } +} + +float AmdAdvancedBlendDifference(float srcComponent, float dstComponent) { + return abs(dstComponent - srcComponent); +} + +float AmdAdvancedBlendExclusion(float srcComponent, float dstComponent) { + return srcComponent + dstComponent - (2.0f * srcComponent * dstComponent); +} + +float AmdAdvancedBlendMinv3(float3 c) { + return min(min(c.r, c.g), c.b); +} + +float AmdAdvancedBlendMaxv3(float3 c) { + return max(max(c.r, c.g), c.b); +} + +float AmdAdvancedBlendLumv3(float3 c) { + return dot(c, float3(0.30f, 0.59f, 0.11f)); +} + +float AmdAdvancedBlendSatv3(float3 c) { + return AmdAdvancedBlendMaxv3(c) - AmdAdvancedBlendMinv3(c); +} + +float3 AmdAdvancedBlendSetLum(float3 cbase, float3 clum) { + float lbase = AmdAdvancedBlendLumv3(cbase); + float llum = AmdAdvancedBlendLumv3(clum); + float ldiff = llum - lbase; + float3 color = cbase + float3(ldiff, ldiff, ldiff); + float minComponent = AmdAdvancedBlendMinv3(color); + float maxComponent = AmdAdvancedBlendMaxv3(color); + float tempValue; + if (minComponent < 0.0f) { + tempValue = llum / (llum - minComponent); + color.r = (color.r - llum) * tempValue + llum; + color.g = (color.g - llum) * tempValue + llum; + color.b = (color.b - llum) * tempValue + llum; + } else if (maxComponent > 1.0f) { + tempValue = (1.0f - llum) / (maxComponent - llum); + color.r = (color.r - llum) * tempValue + llum; + color.g = (color.g - llum) * tempValue + llum; + color.b = (color.b - llum) * tempValue + llum; + } + return color; +} + +float3 AmdAdvancedBlendSetLumSat(float3 cbase, float3 csat, float3 clum) { + float minbase = AmdAdvancedBlendMinv3(cbase); + float sbase = AmdAdvancedBlendSatv3(cbase); + float ssat = AmdAdvancedBlendSatv3(csat); + float3 color; + if (sbase > 0.0f) { + color = (cbase - minbase) * ssat / sbase; + } else { + color = float3(0.0f, 0.0f, 0.0f); + } + return AmdAdvancedBlendSetLum(color, clum); +} + +float AmdAdvancedBlendDivide(float dividend, float divisor) { + if (dividend == divisor) { + return 1.0f; + } else { + return dividend / divisor; + } +} + +export float4 AmdAdvancedBlendInternal(float4 inColor, int4 imageMsLow, int4 imageMsHigh, int4 imageLow, int4 imageHigh, + int4 fmaskLow, int4 fmaskHigh, int mode, bool isMsaa) { + float4 srcColor = inColor; + if (mode == 0) { + return srcColor; + } + float4 fragCoord = AmdExtFragCoord(); + int2 iCoord = int2(fragCoord.x, fragCoord.y); + float4 dstColor; + if (isMsaa) { + dstColor = AmdAdvancedBlendTexelLoadFmask(imageMsLow, imageMsHigh, fmaskLow, fmaskHigh, iCoord, 0); + } else { + dstColor = AmdAdvancedBlendTexelLoad(imageLow, imageHigh, iCoord, 0); + } + // TODO: Uncomment them once ROV is support in LLPC + // int sampleId = AmdExtSampleId(); + // dstColor = AmdAdvancedBlendCoherentTexelLoad(dstColor, iCoord, sampleId); + + if (srcColor.a == 0.0f) { + srcColor.r = 0.0f; + srcColor.g = 0.0f; + srcColor.b = 0.0f; + } else { + srcColor.r = AmdAdvancedBlendDivide(srcColor.r, srcColor.a); + srcColor.g = AmdAdvancedBlendDivide(srcColor.g, srcColor.a); + srcColor.b = AmdAdvancedBlendDivide(srcColor.b, srcColor.a); + } + if (dstColor.a == 0.0f) { + dstColor.r = 0.0f; + dstColor.g = 0.0f; + dstColor.b = 0.0f; + } else { + dstColor.r = AmdAdvancedBlendDivide(dstColor.r, dstColor.a); + dstColor.g = AmdAdvancedBlendDivide(dstColor.g, dstColor.a); + dstColor.b = AmdAdvancedBlendDivide(dstColor.b, dstColor.a); + } + float p0 = srcColor.a * dstColor.a; + float p1 = srcColor.a * (1.0f - dstColor.a); + float p2 = (1.0f - srcColor.a) * dstColor.a; + + float4 blendingOutput; + blendingOutput.r = (srcColor.r * p1) + (dstColor.r * p2); + blendingOutput.g = (srcColor.g * p1) + (dstColor.g * p2); + blendingOutput.b = (srcColor.b * p1) + (dstColor.b * p2); + blendingOutput.a = p0 + p1 + p2; + + float3 tempColor; + float3 cs = float3(srcColor.r, srcColor.g, srcColor.b); + float3 cd = float3(dstColor.r, dstColor.g, dstColor.b); + switch (mode) { + case Multiply: + tempColor.r = AmdAdvancedBlendMultiply(srcColor.r, dstColor.r); + tempColor.g = AmdAdvancedBlendMultiply(srcColor.g, dstColor.g); + tempColor.b = AmdAdvancedBlendMultiply(srcColor.b, dstColor.b); + break; + case Screen: + tempColor.r = AmdAdvancedBlendScreen(srcColor.r, dstColor.r); + tempColor.g = AmdAdvancedBlendScreen(srcColor.g, dstColor.g); + tempColor.b = AmdAdvancedBlendScreen(srcColor.b, dstColor.b); + break; + case Overlay: + tempColor.r = AmdAdvancedBlendOverlay(srcColor.r, dstColor.r); + tempColor.g = AmdAdvancedBlendOverlay(srcColor.g, dstColor.g); + tempColor.b = AmdAdvancedBlendOverlay(srcColor.b, dstColor.b); + break; + case Darken: + tempColor.r = AmdAdvancedBlendDarken(srcColor.r, dstColor.r); + tempColor.g = AmdAdvancedBlendDarken(srcColor.g, dstColor.g); + tempColor.b = AmdAdvancedBlendDarken(srcColor.b, dstColor.b); + break; + case Lighten: + tempColor.r = AmdAdvancedBlendLighten(srcColor.r, dstColor.r); + tempColor.g = AmdAdvancedBlendLighten(srcColor.g, dstColor.g); + tempColor.b = AmdAdvancedBlendLighten(srcColor.b, dstColor.b); + break; + case ColorDodge: + tempColor.r = AmdAdvancedBlendColorDodge(srcColor.r, dstColor.r); + tempColor.g = AmdAdvancedBlendColorDodge(srcColor.g, dstColor.g); + tempColor.b = AmdAdvancedBlendColorDodge(srcColor.b, dstColor.b); + break; + case ColorBun: + tempColor.r = AmdAdvancedBlendColorBurn(srcColor.r, dstColor.r); + tempColor.g = AmdAdvancedBlendColorBurn(srcColor.g, dstColor.g); + tempColor.b = AmdAdvancedBlendColorBurn(srcColor.b, dstColor.b); + break; + case HardLight: + tempColor.r = AmdAdvancedBlendHardLight(srcColor.r, dstColor.r); + tempColor.g = AmdAdvancedBlendHardLight(srcColor.g, dstColor.g); + tempColor.b = AmdAdvancedBlendHardLight(srcColor.b, dstColor.b); + break; + case SoftLight: + tempColor.r = AmdAdvancedBlendSoftLight(srcColor.r, dstColor.r); + tempColor.g = AmdAdvancedBlendSoftLight(srcColor.g, dstColor.g); + tempColor.b = AmdAdvancedBlendSoftLight(srcColor.b, dstColor.b); + break; + case Difference: + tempColor.r = AmdAdvancedBlendDifference(srcColor.r, dstColor.r); + tempColor.g = AmdAdvancedBlendDifference(srcColor.g, dstColor.g); + tempColor.b = AmdAdvancedBlendDifference(srcColor.b, dstColor.b); + break; + case Exclusion: + tempColor.r = AmdAdvancedBlendExclusion(srcColor.r, dstColor.r); + tempColor.g = AmdAdvancedBlendExclusion(srcColor.g, dstColor.g); + tempColor.b = AmdAdvancedBlendExclusion(srcColor.b, dstColor.b); + break; + case HslHue: + tempColor = AmdAdvancedBlendSetLumSat(cs, cd, cd); + break; + case HslSaturation: + tempColor = AmdAdvancedBlendSetLumSat(cd, cs, cd); + break; + case HslColor: + tempColor = AmdAdvancedBlendSetLum(cs, cd); + break; + case HslLuminosity: + tempColor = AmdAdvancedBlendSetLum(cd, cs); + break; + default: + break; + } + + blendingOutput.r += tempColor.r * p0; + blendingOutput.g += tempColor.g * p0; + blendingOutput.b += tempColor.b * p0; + // AmdAdvancedBlendCoherentTexelStore(blendingOutput, iCoord, sampleId); + return blendingOutput; +} + +#endif diff --git a/gfxruntime/tools/CompileAdvancedBlendShader.py b/gfxruntime/tools/CompileAdvancedBlendShader.py new file mode 100644 index 0000000000..88b872d9d3 --- /dev/null +++ b/gfxruntime/tools/CompileAdvancedBlendShader.py @@ -0,0 +1,169 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +#********************************************************************************************************************** +# @file CompileAdvancedBlendShader.py +# @brief A script file to automate the creation of Advanced Blend shader runtime library. +#********************************************************************************************************************** + +"""A script file to automate the creation of Advanced Blend shader runtime library.""" + +import argparse +import os +import subprocess +import sys +import struct +import shutil + +FILE_STANDARD_HEADER = """ +/* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. */ + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! +// +// This code has been generated automatically. Do not hand-modify this code. +// +// WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +""" + +FILE_HEADER_FORMAT_STRING = "static constexpr unsigned AdvancedBlendLibrary[] =\n{\n" +FILE_FOOTER_STRING = "\n};\n" + +SHADER_FILE_NAME = "AdvancedBlend.hlsl" +OUTPUT_FILE_NAME = "AdvancedBlendLibrary" +DXC_EXECUTABLE = "dxc" + +def FixExePath(exePath): + if os.path.exists(exePath + ".exe"): + # Use local Windows Path + return exePath + ".exe" + elif not os.path.exists(exePath): + # Use system PATH on Linux + return os.path.basename(exePath) + return exePath + +def FixInputPath(path) -> str: + return os.path.abspath(path).replace('\\\\', '\\').replace('\\', '/') + +def RemoveFile(path): + # Workaround for file handles sometimes not closing correctly when just using os.remove() + tmpPath = path + "tmp" + os.rename(path, tmpPath) + os.remove(tmpPath) + +def RemoveFolder(path): + shutil.rmtree(path) + +def RunSpirv(compilerPath, inputHlslFile, compiledSpvFile): + commandArgs = [ + compilerPath, + '-T', 'lib_6_3', + '-spirv', + '-fspv-target-env=universal1.5', + '-fvk-use-scalar-layout', + '-Od', + '-Vd', + '-HV', '2021', + '-D', 'AMD_VULKAN=1', + '-Fo', compiledSpvFile, + inputHlslFile + ] + # Ensure the following code is executed from the script's directory. + os.chdir(os.path.dirname(__file__)) + + result = subprocess.run(commandArgs, check=False) + print(' '.join(commandArgs)) + if result.returncode != 0: + return False + return True + +def ConvertSpvFile(compiledSpvFile, outputFile): + try: + spvBinaryFile = open(compiledSpvFile, "rb") + spvBinData = spvBinaryFile.read() + spvBinaryFile.close() + i = 0 + spvHexText = "" + while i < len(spvBinData): + binWord = spvBinData[i:i+4] + intWord = struct.unpack('I', binWord)[0] + hexWord = "{0:#010x}".format(intWord) + spvHexText += hexWord + + i += 4 + + if (i != len(spvBinData)): + spvHexText += "," + if (i % 32 == 0): + spvHexText += "\n" + else: + spvHexText += " " + + outputFile = open(outputFile, "w") + outputFile.write(FILE_STANDARD_HEADER) + outputFile.write(FILE_HEADER_FORMAT_STRING) + outputFile.write(spvHexText) + outputFile.write(FILE_FOOTER_STRING) + outputFile.close() + return True + except Exception as e: + return False + +def main(): + result = 0 + parser = argparse.ArgumentParser(description='Helper script to compile Advanced Blend runtime library.') + parser.add_argument('--compilerPath', help='Path to SPIR-V compiler', default='dxc') + parser.add_argument('--shaderDir', help='path to the source shader', default=None) + parser.add_argument('--outputDir', help='Output directory for compiled shaders', default=None) + + args = parser.parse_args() + compilerPath = FixInputPath(args.compilerPath) + '/' + DXC_EXECUTABLE + compilerPath = FixExePath(compilerPath) + shaderInputDir = FixInputPath(args.shaderDir) + outputDir = FixInputPath(args.outputDir) + + tempDirPath = outputDir + '/' + 'tmp' + # Get rid of temp dir that can be left over from an unclean build. + if os.path.exists(tempDirPath): + RemoveFolder(tempDirPath) + os.mkdir(tempDirPath) + + compiledSpvFile = tempDirPath + '/' + OUTPUT_FILE_NAME + '.spv' + inputHlslFile = shaderInputDir + '/' + SHADER_FILE_NAME + result = RunSpirv(compilerPath, inputHlslFile, compiledSpvFile) + if result : + os.chdir(outputDir) + outputFile = outputDir + '/' + 'g_' + OUTPUT_FILE_NAME + '_spv.h' + result = ConvertSpvFile(compiledSpvFile, outputFile) + + RemoveFile(compiledSpvFile) + RemoveFolder(tempDirPath) + return 0 if result else 1 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/include/vkgcDefs.h b/include/vkgcDefs.h index 8be6f50d83..2ffe7fdec5 100644 --- a/include/vkgcDefs.h +++ b/include/vkgcDefs.h @@ -226,7 +226,8 @@ enum InternalBinding : unsigned { PrintfBufferBindingId = 6, ///< Binding ID of internal buffer for debug printf ReverseThreadGroupControlBinding = 7, ///< Binding ID of internal buffer for reverseThreadGroup RtCaptureReplayInternalBufferBinding = 8, ///< Binding ID of ray-tracing capture replay internal buffer - SpecConstInternalBufferBindingId = 9, ///< Binding ID of internal buffer for specialized constant. + PixelOpInternalBinding = 9, ///< Binding ID of pixel operand image buffer. + SpecConstInternalBufferBindingId = 10, ///< Binding ID of internal buffer for specialized constant. SpecConstInternalBufferBindingIdEnd = SpecConstInternalBufferBindingId + ShaderStageCount, ConstantBuffer0Binding = 24, ///< Binding ID of default uniform block ConstantBuffer0BindingEnd = ConstantBuffer0Binding + ShaderStageGfxCount, @@ -283,6 +284,14 @@ enum GlCompatibilityUniformLocation : unsigned { UniformLocationCount = ClipPlane + GlCompatibilityLimits::MaxClipPlanes - GlCompatibilityLimits::MaxUniformLocations }; +/// Represents how to enable/disable DrawPixels related patches. +enum GlCompatibilityDrawPixelsType : unsigned { + DrawPixelsTypeNone, ///< Disable all DrawPixels related patch + DrawPixelsTypeColor, ///< Enable DrawPixels for color + DrawPixelsTypeDepth, ///< Enable DrawPixels for depth + DrawPixelsTypeStencil, ///< Enable DrawPixels for stencil +}; + /// Enumerates the function of a particular node in a shader's resource mapping graph. enum class ResourceMappingNodeType : unsigned { Unknown, ///< Invalid type @@ -499,6 +508,7 @@ struct ResourceNodeData { unsigned location; ///< ID of resource location bool mergedLocationBinding; ///< TRUE if location and binding are merged in spirv binary unsigned isTexelBuffer; ///< TRUE if it is ImageBuffer or TextureBuffer + unsigned isTexelFetchUsed; ///< TRUE if texelFetch is used unsigned isDefaultUniformSampler; ///< TRUE if it's sampler image in default uniform struct unsigned columnCount; ///< Column count if this is a matrix variable. BasicType basicType; ///< Type of the variable or element @@ -536,38 +546,39 @@ struct ResourcesNodes { /// Represents usage info of a shader module struct ShaderModuleUsage { - bool enableVarPtrStorageBuf; ///< Whether to enable "VariablePointerStorageBuffer" capability - bool enableVarPtr; ///< Whether to enable "VariablePointer" capability - bool useSubgroupSize; ///< Whether gl_SubgroupSize is used - bool useSpecConstant; ///< Whether specialization constant is used - bool keepUnusedFunctions; ///< Whether to keep unused function - bool enableRayQuery; ///< Whether the "RayQueryKHR" capability is used - bool rayQueryLibrary; ///< Whether the shaderModule is rayQueryLibrary - bool isInternalRtShader; ///< Whether the shaderModule is a GPURT internal shader (e.g. BVH build) - bool hasTraceRay; ///< Whether the shaderModule has OpTraceRayKHR - bool hasExecuteCallable; ///< Whether the shaderModule has OpExecuteCallableKHR - bool useIsNan; ///< Whether IsNan is used - bool useInvariant; ///< Whether invariant variable is used - bool usePointSize; ///< Whether gl_PointSize is used in output - bool useShadingRate; ///< Whether shading rate is used - bool useSampleInfo; ///< Whether gl_SamplePosition or InterpolateAtSample are used - bool useClipVertex; ///< Whether gl_ClipVertex is used - bool useFrontColor; ///< Whether gl_FrontColor is used - bool useBackColor; ///< Whether gl_BackColor is used - bool useFrontSecondaryColor; ///< Whether gl_FrontSecondaryColor is used - bool useBackSecondaryColor; ///< Whether gl_BackSecondaryColor is used - ResourcesNodes *pResources; ///< Resource node for buffers and opaque types - bool useFragCoord; ///< Whether gl_FragCoord is used - bool originUpperLeft; ///< Whether pixel origin is upper-left - bool pixelCenterInteger; ///< Whether pixel coord is Integer - bool useGenericBuiltIn; ///< Whether to use builtIn inputs that include gl_PointCoord, gl_PrimitiveId, - /// gl_Layer, gl_ClipDistance or gl_CullDistance. - bool enableXfb; ///< Whether transform feedback is enabled - unsigned localSizeX; ///< Compute shader work-group size in the X dimension - unsigned localSizeY; ///< Compute shader work-group size in the Y dimension - unsigned localSizeZ; ///< Compute shader work-group size in the Z dimension - bool useBarycentric; ///< Whether to use gl_BarycentricXX or pervertexEXT decoration - bool disableDualSource; ///< Whether disable dualSource blend + bool enableVarPtrStorageBuf; ///< Whether to enable "VariablePointerStorageBuffer" capability + bool enableVarPtr; ///< Whether to enable "VariablePointer" capability + bool useSubgroupSize; ///< Whether gl_SubgroupSize is used + bool useSpecConstant; ///< Whether specialization constant is used + bool keepUnusedFunctions; ///< Whether to keep unused function + bool enableRayQuery; ///< Whether the "RayQueryKHR" capability is used + bool rayQueryLibrary; ///< Whether the shaderModule is rayQueryLibrary + bool isInternalRtShader; ///< Whether the shaderModule is a GPURT internal shader (e.g. BVH build) + bool hasTraceRay; ///< Whether the shaderModule has OpTraceRayKHR + bool hasExecuteCallable; ///< Whether the shaderModule has OpExecuteCallableKHR + bool useIsNan; ///< Whether IsNan is used + bool useInvariant; ///< Whether invariant variable is used + bool usePointSize; ///< Whether gl_PointSize is used in output + bool useShadingRate; ///< Whether shading rate is used + bool useSampleInfo; ///< Whether gl_SamplePosition or InterpolateAtSample are used + bool useClipVertex; ///< Whether gl_ClipVertex is used + bool useFrontColor; ///< Whether gl_FrontColor is used + bool useBackColor; ///< Whether gl_BackColor is used + bool useFrontSecondaryColor; ///< Whether gl_FrontSecondaryColor is used + bool useBackSecondaryColor; ///< Whether gl_BackSecondaryColor is used + ResourcesNodes *pResources; ///< Resource node for buffers and opaque types + bool useFragCoord; ///< Whether gl_FragCoord is used + bool originUpperLeft; ///< Whether pixel origin is upper-left + bool pixelCenterInteger; ///< Whether pixel coord is Integer + bool useGenericBuiltIn; ///< Whether to use builtIn inputs that include gl_PointCoord, gl_PrimitiveId, + /// gl_Layer, gl_ClipDistance or gl_CullDistance. + bool enableXfb; ///< Whether transform feedback is enabled + unsigned localSizeX; ///< Compute shader work-group size in the X dimension + unsigned localSizeY; ///< Compute shader work-group size in the Y dimension + unsigned localSizeZ; ///< Compute shader work-group size in the Z dimension + bool useBarycentric; ///< Whether to use gl_BarycentricXX or pervertexEXT decoration + bool disableDualSource; ///< Whether disable dualSource blend + uint32_t clipDistanceArraySize; ///< Count of output clip distance }; /// Represents common part of shader module data @@ -1168,6 +1179,11 @@ struct TessellationLevel { float outer[4]; ///< Outer tessellation level }; +struct AdvancedBlendInfo { + bool enableAdvancedBlend; ///< Whether enable advanced blending + unsigned binding; ///< The binding point of the texture resource attached to the framebuffer +}; + /// Represents info to build a graphics pipeline. struct GraphicsPipelineBuildInfo { void *pInstance; ///< Vulkan instance object @@ -1262,17 +1278,24 @@ struct GraphicsPipelineBuildInfo { const auto &getGlState() const { return *this; } #else struct { - bool originUpperLeft; ///< Whether origin coordinate of framebuffer is upper-left. - unsigned numUniformConstantMaps; ///< Number of uniform constant maps - UniformConstantMap **ppUniformMaps; ///< Pointers to array of pointers for the uniform constant map. - ApiXfbOutData apiXfbOutData; ///< Transform feedback data specified by API interface. - bool vbAddressLowBitsKnown; ///< Whether vbAddressLowBits is valid - uint8_t vbAddressLowBits[MaxVertexBindings]; ///< Lowest two bits of vertex buffer addresses + bool originUpperLeft; ///< Whether origin coordinate of framebuffer is upper-left. + bool vbAddressLowBitsKnown; ///< Whether vbAddressLowBits is valid + bool enableBitmap; ///< Whether enable Bitmap patch + bool enableBitmapLsb; ///< Whether enable LSB as first bit in bitmap resource + bool enableTwoSideLighting; ///< Whether enable two-side lighting + unsigned numUniformConstantMaps; ///< Number of uniform constant maps + UniformConstantMap **ppUniformMaps; ///< Pointers to array of pointers for the uniform constant map. + ApiXfbOutData apiXfbOutData; ///< Transform feedback data specified by API interface. + GlCompatibilityDrawPixelsType drawPixelsType; ///< DrawPixel type. + uint8_t vbAddressLowBits[MaxVertexBindings]; ///< Lowest two bits of vertex buffer addresses + float pixelTransferScale[4]; ///< Scale apply to render color target + float pixelTransferBias[4]; ///< Bias apply to render color target } glState; const auto &getGlState() const { return glState; } #endif - const void *pClientMetadata; ///< Pointer to (optional) client-defined data to be stored inside the ELF - size_t clientMetadataSize; ///< Size (in bytes) of the client-defined data + const void *pClientMetadata; ///< Pointer to (optional) client-defined data to be stored inside the ELF + size_t clientMetadataSize; ///< Size (in bytes) of the client-defined data + AdvancedBlendInfo advancedBlendInfo; ///< The info of advanced blend }; /// Represents info to build a compute pipeline. @@ -1442,6 +1465,12 @@ class IPipelineDumper { /// @param [in] pPipelineBin Pipeline binary (ELF) static void VKAPI_CALL DumpPipelineBinary(void *pDumpFile, GfxIpVersion gfxIp, const BinaryData *pPipelineBin); + /// Dump graphics stage library file name. + /// + /// @param [in] pDumpFile The handle of pipeline dump file + /// @param [in] libFileNames File name array of size three + static void VKAPI_CALL DumpGraphicsLibraryFileName(void *pDumpFile, const char **libFileNames); + /// Dump extra info to pipeline file. /// /// @param [in] pDumpFile The handle of pipeline dump file diff --git a/lgc/CMakeLists.txt b/lgc/CMakeLists.txt index b7a8cb97cc..e7feab6d76 100644 --- a/lgc/CMakeLists.txt +++ b/lgc/CMakeLists.txt @@ -141,8 +141,6 @@ target_sources(LLVMlgc PRIVATE patch/ConfigBuilderBase.cpp patch/Continufy.cpp patch/FragColorExport.cpp - patch/Gfx9Chip.cpp - patch/Gfx9ConfigBuilder.cpp patch/LowerDebugPrintf.cpp patch/LowerDesc.cpp patch/LowerSubgroupOps.cpp diff --git a/lgc/builder/ArithBuilder.cpp b/lgc/builder/ArithBuilder.cpp index 14efa07468..722c224231 100644 --- a/lgc/builder/ArithBuilder.cpp +++ b/lgc/builder/ArithBuilder.cpp @@ -941,10 +941,12 @@ Value *BuilderImpl::CreateFClamp(Value *x, Value *minVal, Value *maxVal, const T result->setName(instName); } else { // For double, use a combination of fmin and fmax. - CallInst *max = CreateMaxNum(x, minVal); - max->setFastMathFlags(getFastMathFlags()); - CallInst *min = CreateMinNum(max, maxVal, instName); - min->setFastMathFlags(getFastMathFlags()); + Value *max = CreateMaxNum(x, minVal); + if (auto *call = dyn_cast(max)) + call->setFastMathFlags(getFastMathFlags()); + Value *min = CreateMinNum(max, maxVal, instName); + if (auto *call = dyn_cast(min)) + call->setFastMathFlags(getFastMathFlags()); result = min; } @@ -961,12 +963,12 @@ Value *BuilderImpl::CreateFClamp(Value *x, Value *minVal, Value *maxVal, const T // @param value2 : Second value // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateFMin(Value *value1, Value *value2, const Twine &instName) { - CallInst *min = CreateMinNum(value1, value2); - min->setFastMathFlags(getFastMathFlags()); - Value *result = min; + Value *min = CreateMinNum(value1, value2); + if (auto *call = dyn_cast(min)) + call->setFastMathFlags(getFastMathFlags()); - result->setName(instName); - return result; + min->setName(instName); + return min; } // ===================================================================================================================== @@ -978,12 +980,12 @@ Value *BuilderImpl::CreateFMin(Value *value1, Value *value2, const Twine &instNa // @param value2 : Second value // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateFMax(Value *value1, Value *value2, const Twine &instName) { - CallInst *max = CreateMaxNum(value1, value2); - max->setFastMathFlags(getFastMathFlags()); - Value *result = max; + Value *max = CreateMaxNum(value1, value2); + if (auto *call = dyn_cast(max)) + call->setFastMathFlags(getFastMathFlags()); - result->setName(instName); - return result; + max->setName(instName); + return max; } // ===================================================================================================================== @@ -996,14 +998,16 @@ Value *BuilderImpl::CreateFMax(Value *value1, Value *value2, const Twine &instNa // @param value3 : Third value // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateFMin3(Value *value1, Value *value2, Value *value3, const Twine &instName) { - CallInst *min1 = CreateMinNum(value1, value2); - min1->setFastMathFlags(getFastMathFlags()); - CallInst *min2 = CreateMinNum(min1, value3); - min2->setFastMathFlags(getFastMathFlags()); - Value *result = min2; + Value *min1 = CreateMinNum(value1, value2); + if (auto *call = dyn_cast(min1)) + call->setFastMathFlags(getFastMathFlags()); - result->setName(instName); - return result; + Value *min2 = CreateMinNum(min1, value3); + if (auto *call = dyn_cast(min2)) + call->setFastMathFlags(getFastMathFlags()); + + min2->setName(instName); + return min2; } // ===================================================================================================================== @@ -1016,14 +1020,16 @@ Value *BuilderImpl::CreateFMin3(Value *value1, Value *value2, Value *value3, con // @param value3 : Third value // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateFMax3(Value *value1, Value *value2, Value *value3, const Twine &instName) { - CallInst *max1 = CreateMaxNum(value1, value2); - max1->setFastMathFlags(getFastMathFlags()); - CallInst *max2 = CreateMaxNum(max1, value3); - max2->setFastMathFlags(getFastMathFlags()); - Value *result = max2; + Value *max1 = CreateMaxNum(value1, value2); + if (auto *call = dyn_cast(max1)) + call->setFastMathFlags(getFastMathFlags()); - result->setName(instName); - return result; + Value *max2 = CreateMaxNum(max1, value3); + if (auto *call = dyn_cast(max2)) + call->setFastMathFlags(getFastMathFlags()); + + max2->setName(instName); + return max2; } // ===================================================================================================================== @@ -1046,14 +1052,21 @@ Value *BuilderImpl::CreateFMid3(Value *value1, Value *value2, Value *value3, con }); } else { // For double, use a combination of fmin and fmax. - CallInst *min1 = CreateMinNum(value1, value2); - min1->setFastMathFlags(getFastMathFlags()); - CallInst *max1 = CreateMaxNum(value1, value2); - max1->setFastMathFlags(getFastMathFlags()); - CallInst *min2 = CreateMinNum(max1, value3); - min2->setFastMathFlags(getFastMathFlags()); - CallInst *max2 = CreateMaxNum(min1, min2, instName); - max2->setFastMathFlags(getFastMathFlags()); + Value *min1 = CreateMinNum(value1, value2); + if (auto *call = dyn_cast(min1)) + call->setFastMathFlags(getFastMathFlags()); + + Value *max1 = CreateMaxNum(value1, value2); + if (auto *call = dyn_cast(max1)) + call->setFastMathFlags(getFastMathFlags()); + + Value *min2 = CreateMinNum(max1, value3); + if (auto *call = dyn_cast(min2)) + call->setFastMathFlags(getFastMathFlags()); + + Value *max2 = CreateMaxNum(min1, min2, instName); + if (auto *call = dyn_cast(max2)) + call->setFastMathFlags(getFastMathFlags()); result = max2; } diff --git a/lgc/builder/Builder.cpp b/lgc/builder/Builder.cpp index e2312194f6..fa9b9214e7 100644 --- a/lgc/builder/Builder.cpp +++ b/lgc/builder/Builder.cpp @@ -301,11 +301,13 @@ CallInst *Builder::CreateUnaryIntrinsic(Intrinsic::ID id, Value *value, Instruct // @param value2 : Input value 2 // @param fmfSource : Instruction to copy fast math flags from; nullptr to get from Builder // @param name : Name to give instruction -CallInst *Builder::CreateBinaryIntrinsic(Intrinsic::ID id, Value *value1, Value *value2, Instruction *fmfSource, - const Twine &name) { - CallInst *result = IRBuilder<>::CreateBinaryIntrinsic(id, value1, value2, fmfSource, name); - if (!fmfSource && isa(result)) - result->setFastMathFlags(getFastMathFlags()); +Value *Builder::CreateBinaryIntrinsic(Intrinsic::ID id, Value *value1, Value *value2, Instruction *fmfSource, + const Twine &name) { + Value *result = IRBuilder<>::CreateBinaryIntrinsic(id, value1, value2, fmfSource, name); + if (auto *intr = dyn_cast(result)) { + if (!fmfSource && isa(intr)) + intr->setFastMathFlags(getFastMathFlags()); + } return result; } diff --git a/lgc/builder/BuilderImpl.cpp b/lgc/builder/BuilderImpl.cpp index 1ee82a9d57..3e88482491 100644 --- a/lgc/builder/BuilderImpl.cpp +++ b/lgc/builder/BuilderImpl.cpp @@ -245,12 +245,6 @@ Value *BuilderImpl::CreateIntegerDotProduct(Value *vector1, Value *vector2, Valu return computedResult; } -// ===================================================================================================================== -// Get whether the context we are building in supports DPP ROW_XMASK operations. -bool BuilderImpl::supportDppRowXmask() const { - return getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 10; -} - // ===================================================================================================================== // Get whether the context we are building in support the bpermute operation. bool BuilderImpl::supportWaveWideBPermute() const { @@ -258,16 +252,10 @@ bool BuilderImpl::supportWaveWideBPermute() const { auto supportBPermute = gfxIp == 8 || gfxIp == 9; auto shaderStage = getShaderStage(GetInsertBlock()->getParent()); auto waveSize = getPipelineState()->getShaderWaveSize(shaderStage.value()); - supportBPermute = supportBPermute || (gfxIp >= 10 && waveSize == 32); + supportBPermute = supportBPermute || waveSize == 32; return supportBPermute; } -// ===================================================================================================================== -// Get whether the context we are building in supports permute lane DPP operations. -bool BuilderImpl::supportPermLaneDpp() const { - return getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 10; -} - // ===================================================================================================================== // Get whether the context we are building in supports permute lane 64 DPP operations. bool BuilderImpl::supportPermLane64Dpp() const { diff --git a/lgc/builder/BuilderRecorder.cpp b/lgc/builder/BuilderRecorder.cpp index 758612329c..92e78a02ab 100644 --- a/lgc/builder/BuilderRecorder.cpp +++ b/lgc/builder/BuilderRecorder.cpp @@ -220,8 +220,6 @@ StringRef BuilderRecorder::getCallName(BuilderOpcode opcode) { return "barrier"; case BuilderOpcode::Kill: return "kill"; - case BuilderOpcode::DebugBreak: - return "debug.break"; case BuilderOpcode::ReadClock: return "read.clock"; case BuilderOpcode::Derivative: @@ -254,6 +252,8 @@ StringRef BuilderRecorder::getCallName(BuilderOpcode opcode) { return "image.query.size"; case BuilderOpcode::ImageGetLod: return "image.get.lod"; + case BuilderOpcode::ImageGetSamplePosition: + return "image.get.sample.position"; case BuilderOpcode::ImageBvhIntersectRay: return "image.bvh.intersect.ray"; case BuilderOpcode::GetWaveSize: @@ -494,14 +494,6 @@ Instruction *Builder::CreateReadClock(bool realtime, const Twine &instName) { return record(BuilderOpcode::ReadClock, getInt64Ty(), getInt1(realtime), instName); } -// ===================================================================================================================== -// Create a "debug break halt" -// -// @param instName : Name to give final instruction -Instruction *Builder::CreateDebugBreak(const Twine &instName) { - return record(BuilderOpcode::DebugBreak, getVoidTy(), {}, instName); -} - // ===================================================================================================================== // Create tan operation // @@ -1353,6 +1345,20 @@ Value *Builder::CreateImageGetLod(unsigned dim, unsigned flags, Value *imageDesc {getInt32(dim), getInt32(flags), imageDesc, samplerDesc, coord}, instName); } +// ===================================================================================================================== +// Create a query of the sample position of given sample id in an image. Returns an v2f32 value. +// +// @param dim : Image dimension +// @param flags : ImageFlag* flags +// @param imageDesc : Image descriptor or texel buffer descriptor +// @param sampleId : Sample ID +// @param instName : Name to give instruction(s) +Value *Builder::CreateImageGetSamplePosition(unsigned dim, unsigned flags, Value *imageDesc, Value *sampleId, + const llvm::Twine &instName) { + return record(BuilderOpcode::ImageGetSamplePosition, FixedVectorType::get(getFloatTy(), 2), + {getInt32(dim), getInt32(flags), imageDesc, sampleId}, instName); +} + // ===================================================================================================================== // Create a read of (part of) a user input value, passed from the previous shader stage. // @@ -2138,13 +2144,13 @@ Instruction *Builder::record(BuilderOpcode opcode, Type *resultTy, ArrayRefgetTargetInfo().getGfxIpVersion().major >= 10) { - // NOTE: The sub-attribute 'wavefrontsize' of 'target-features' is set in advance to let optimization - // pass know we are in which wavesize mode. - auto shaderStage = lgc::getShaderStage(&func); - if (shaderStage) { - unsigned waveSize = pipelineState->getShaderWaveSize(shaderStage.value()); - func.addFnAttr("target-features", ",+wavefrontsize" + std::to_string(waveSize)); - } + // NOTE: The sub-attribute 'wavefrontsize' of 'target-features' is set in advance to let optimization + // pass know we are in which wavesize mode. + auto shaderStage = lgc::getShaderStage(&func); + if (shaderStage) { + unsigned waveSize = pipelineState->getShaderWaveSize(shaderStage.value()); + func.addFnAttr("target-features", ",+wavefrontsize" + std::to_string(waveSize)); } continue; @@ -547,6 +545,14 @@ Value *BuilderReplayer::processCall(unsigned opcode, CallInst *call) { return m_builder->CreateImageGetLod(dim, flags, imageDesc, samplerDesc, coord); } + case BuilderOpcode::ImageGetSamplePosition: { + unsigned dim = cast(args[0])->getZExtValue(); + unsigned flags = cast(args[1])->getZExtValue(); + Value *imageDesc = args[2]; + Value *sampleId = args[3]; + return m_builder->CreateImageGetSamplePosition(dim, flags, imageDesc, sampleId); + } + // Replayer implementations of InOutBuilder methods case BuilderOpcode::ReadGenericInput: { InOutInfo inputInfo(cast(args[4])->getZExtValue()); @@ -675,9 +681,6 @@ Value *BuilderReplayer::processCall(unsigned opcode, CallInst *call) { case BuilderOpcode::IsHelperInvocation: { return m_builder->CreateIsHelperInvocation(); } - case BuilderOpcode::DebugBreak: { - return m_builder->CreateDebugBreak(); - } case BuilderOpcode::TransposeMatrix: { return m_builder->CreateTransposeMatrix(args[0]); } diff --git a/lgc/builder/DescBuilder.cpp b/lgc/builder/DescBuilder.cpp index ed3d7a35c0..57f45587a4 100644 --- a/lgc/builder/DescBuilder.cpp +++ b/lgc/builder/DescBuilder.cpp @@ -54,9 +54,10 @@ using namespace llvm; // @param binding : Descriptor binding // @param descIndex : Descriptor index // @param flags : BufferFlag* bit settings +// @param stride : stride for index mode access // @param instName : Name to give instruction(s) -Value *BuilderImpl::CreateBufferDesc(uint64_t descSet, unsigned binding, Value *descIndex, unsigned flags, - const Twine &instName) { +Value *BuilderImpl::createBufferDesc(uint64_t descSet, unsigned binding, Value *descIndex, unsigned flags, + unsigned stride, const Twine &instName) { Value *desc = nullptr; bool return64Address = false; descIndex = scalarizeIfUniform(descIndex, flags & BufferFlagNonUniform); @@ -117,7 +118,7 @@ Value *BuilderImpl::CreateBufferDesc(uint64_t descSet, unsigned binding, Value * Value *descPtr = getDescPtr(node->concreteType, topNode, node, binding); if (return64Address) return descPtr; - desc = buildInlineBufferDesc(descPtr); + desc = buildInlineBufferDesc(descPtr, stride); } else { ResourceNodeType resType = node->concreteType; ResourceNodeType abstractType = node->abstractType; @@ -165,8 +166,9 @@ Value *BuilderImpl::CreateBufferDesc(uint64_t descSet, unsigned binding, Value * } if (node && (node->concreteType == ResourceNodeType::DescriptorBufferCompact || - node->concreteType == ResourceNodeType::DescriptorConstBufferCompact)) - desc = buildBufferCompactDesc(desc); + node->concreteType == ResourceNodeType::DescriptorConstBufferCompact)) { + desc = buildBufferCompactDesc(desc, stride); + } if (!instName.isTriviallyEmpty()) desc->setName(instName); @@ -369,19 +371,21 @@ Value *BuilderImpl::scalarizeIfUniform(Value *value, bool isNonUniform) { // Calculate a buffer descriptor for an inline buffer // // @param descPtr : Pointer to inline buffer -Value *BuilderImpl::buildInlineBufferDesc(Value *descPtr) { +// @param stride : stride for the buffer descriptor to access in index mode +Value *BuilderImpl::buildInlineBufferDesc(Value *descPtr, unsigned stride) { // Bitcast the pointer to v2i32 descPtr = CreatePtrToInt(descPtr, getInt64Ty()); descPtr = CreateBitCast(descPtr, FixedVectorType::get(getInt32Ty(), 2)); - return buildBufferCompactDesc(descPtr); + return buildBufferCompactDesc(descPtr, stride); } // ===================================================================================================================== // Build buffer compact descriptor // // @param desc : The buffer descriptor base to build for the buffer compact descriptor -Value *BuilderImpl::buildBufferCompactDesc(Value *desc) { +// @param stride : stride for the buffer descriptor to access in index mode +Value *BuilderImpl::buildBufferCompactDesc(Value *desc, unsigned stride) { const GfxIpVersion gfxIp = m_pipelineState->getTargetInfo().getGfxIpVersion(); // Extract compact buffer descriptor @@ -397,6 +401,11 @@ Value *BuilderImpl::buildBufferCompactDesc(Value *desc) { SqBufRsrcWord1 sqBufRsrcWord1 = {}; sqBufRsrcWord1.bits.baseAddressHi = UINT16_MAX; descElem1 = CreateAnd(descElem1, getInt32(sqBufRsrcWord1.u32All)); + if (stride) { + SqBufRsrcWord1 sqBufRsrcWord1Stride = {}; + sqBufRsrcWord1Stride.bits.stride = stride; + descElem1 = CreateOr(descElem1, getInt32(sqBufRsrcWord1Stride.u32All)); + } bufDesc = CreateInsertElement(bufDesc, descElem1, 1); // Dword 2 @@ -413,12 +422,12 @@ Value *BuilderImpl::buildBufferCompactDesc(Value *desc) { if (gfxIp.major == 10) { sqBufRsrcWord3.gfx10.format = BUF_FORMAT_32_UINT; sqBufRsrcWord3.gfx10.resourceLevel = 1; - sqBufRsrcWord3.gfx10.oobSelect = 2; - assert(sqBufRsrcWord3.u32All == 0x21014FAC); + sqBufRsrcWord3.gfx10.oobSelect = stride ? 3 : 2; + assert(sqBufRsrcWord3.u32All == 0x21014FAC || sqBufRsrcWord3.u32All == 0x31014FAC); } else if (gfxIp.major >= 11) { sqBufRsrcWord3.gfx11.format = BUF_FORMAT_32_UINT; - sqBufRsrcWord3.gfx11.oobSelect = 2; - assert(sqBufRsrcWord3.u32All == 0x20014FAC); + sqBufRsrcWord3.gfx11.oobSelect = stride ? 3 : 2; + assert(sqBufRsrcWord3.u32All == 0x20014FAC || sqBufRsrcWord3.u32All == 0x30014FAC); } else { llvm_unreachable("Not implemented!"); } diff --git a/lgc/builder/ImageBuilder.cpp b/lgc/builder/ImageBuilder.cpp index 6dbc21e119..f0da76d2e8 100644 --- a/lgc/builder/ImageBuilder.cpp +++ b/lgc/builder/ImageBuilder.cpp @@ -30,6 +30,7 @@ */ #include "YCbCrConverter.h" #include "lgc/LgcContext.h" +#include "lgc/LgcDialect.h" #include "lgc/builder/BuilderImpl.h" #include "lgc/state/TargetInfo.h" #include "lgc/util/Internal.h" @@ -476,8 +477,7 @@ Value *BuilderImpl::CreateImageLoad(Type *resultTy, unsigned dim, unsigned flags if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 11) { if (flags & (ImageFlagCoherent | ImageFlagVolatile)) { coherent.bits.glc = true; - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 10) - coherent.bits.dlc = true; + coherent.bits.dlc = true; } } @@ -1027,8 +1027,7 @@ Value *BuilderImpl::CreateImageSampleGather(Type *resultTy, unsigned dim, unsign if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 11) { if (flags & (ImageFlagCoherent | ImageFlagVolatile)) { coherent.bits.glc = true; - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 10) - coherent.bits.dlc = true; + coherent.bits.dlc = true; } } @@ -1266,6 +1265,18 @@ Value *BuilderImpl::CreateImageQuerySamples(unsigned dim, unsigned flags, Value Value *isNullDesc = CreateICmpEQ(descWord3, getInt32(0)); sampleNumber = CreateSelect(isNullDesc, getInt32(0), sampleNumber); } + + if (flags & ImageFlagSamplePatternOffset) { + Value *descWord6 = CreateExtractElement(imageDesc, 6); + // sample pattern index is bit203-206, which is bit11-14 of word 6. + Value *samplePatternOffset = + CreateIntrinsic(Intrinsic::amdgcn_ubfe, getInt32Ty(), {descWord6, getInt32(11), getInt32(4)}); + // This is offset in entries, and now it is converted to offset in dwords (each entry has 16 dwords). + samplePatternOffset = CreateMul(samplePatternOffset, getInt32(16)); + // Or the offset to the high 16 bit of sampleNumber. + sampleNumber = CreateOr(CreateShl(samplePatternOffset, 16), sampleNumber); + } + return sampleNumber; } @@ -1316,15 +1327,11 @@ Value *BuilderImpl::CreateImageQuerySize(unsigned dim, unsigned flags, Value *im Value *mipDepth = CreateLShr(depth, curLevel); mipDepth = CreateSelect(CreateICmpEQ(mipDepth, getInt32(0)), getInt32(1), mipDepth); - if (getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 10) { - Value *arrayPitch = proxySqRsrcRegHelper.getReg(SqRsrcRegs::ArrayPitch); - Value *baseArray = proxySqRsrcRegHelper.getReg(SqRsrcRegs::BaseArray); - Value *sliceDepth = CreateSub(depth, baseArray); - Value *isSlice = CreateTrunc(arrayPitch, getInt1Ty()); - depth = CreateSelect(isSlice, sliceDepth, mipDepth); - } else { - depth = mipDepth; - } + Value *arrayPitch = proxySqRsrcRegHelper.getReg(SqRsrcRegs::ArrayPitch); + Value *baseArray = proxySqRsrcRegHelper.getReg(SqRsrcRegs::BaseArray); + Value *sliceDepth = CreateSub(depth, baseArray); + Value *isSlice = CreateTrunc(arrayPitch, getInt1Ty()); + depth = CreateSelect(isSlice, sliceDepth, mipDepth); } // Set to 0 if allowNullDescriptor is on and image descriptor is a null descriptor @@ -1437,6 +1444,36 @@ Value *BuilderImpl::CreateImageGetLod(unsigned dim, unsigned flags, Value *image return result; } +// ===================================================================================================================== +// Create a query of the sample position of given sample id in an image. Returns an v2f32 value. +// +// @param dim : Image dimension +// @param flags : ImageFlag* flags +// @param imageDesc : Image descriptor or texel buffer descriptor +// @param sampleId : Sample ID +// @param instName : Name to give instruction(s) +Value *BuilderImpl::CreateImageGetSamplePosition(unsigned dim, unsigned flags, Value *imageDesc, Value *sampleId, + const Twine &instName) { + // Add ImageFlagSamplePatternOffset to query back both sample count and sample pattern offset. + Value *sampleInfo = CreateImageQuerySamples(dim, flags | ImageFlagSamplePatternOffset, imageDesc); + Value *sampleCount = CreateAnd(sampleInfo, 0xFFFF); + Value *samplePatternOffset = CreateLShr(sampleInfo, 16); + + Value *validOffset = CreateAdd(samplePatternOffset, sampleId); + // offset = (sampleCount > sampleId) ? (samplePatternOffset + sampleId) : 0 + Value *sampleValid = CreateICmpUGT(sampleCount, sampleId); + Value *offset = CreateSelect(sampleValid, validOffset, getInt32(0)); + + // Load sample position descriptor. + Type *descTy = getDescTy(ResourceNodeType::DescriptorBuffer); + Value *desc = create(descTy, SiDrvTableSamplepos); + // Load the value using the descriptor. + offset = CreateShl(offset, getInt32(4)); + + Type *samplePosTy = FixedVectorType::get(getFloatTy(), 2); + return CreateIntrinsic(Intrinsic::amdgcn_raw_buffer_load, samplePosTy, {desc, offset, getInt32(0), getInt32(0)}); +} + // ===================================================================================================================== // Create a ray intersect result with specified node in BVH buffer // diff --git a/lgc/builder/InOutBuilder.cpp b/lgc/builder/InOutBuilder.cpp index e7a34f23d3..0eacb6ca44 100644 --- a/lgc/builder/InOutBuilder.cpp +++ b/lgc/builder/InOutBuilder.cpp @@ -513,6 +513,31 @@ void BuilderImpl::markGenericInputOutputUsage(bool isOutput, unsigned location, // Mark usage for interpolation info. markInterpolationInfo(inOutInfo); } + + if (isOutput && m_shaderStage == ShaderStage::Mesh) { + // Record number of components for mesh shader outputs + for (unsigned i = 0; i < locationCount; ++i) { + unsigned numComponents = 0; + if (inOutInfo.getNumComponents() > 4) { + assert(locationCount % 2 == 0); // Must have even number of locations for 64-bit data type + assert(inOutInfo.getComponent() == 0); // Start component must be 0 in this case + // NOTE: For 64-bit vec3/vec4 data types, they will occupy two consecutive locations, we only record the number + // of components to the former one and skip the latter one. + if (i % 2 != 0) + continue; + numComponents = inOutInfo.getNumComponents(); + } else { + numComponents = inOutInfo.getComponent() + inOutInfo.getNumComponents(); + } + + if (inOutInfo.isPerPrimitive()) + resUsage->inOutUsage.mesh.primitiveOutputComponents[location + i] = {numComponents, + static_cast(InvalidValue)}; + else + resUsage->inOutUsage.mesh.vertexOutputComponents[location + i] = {numComponents, + static_cast(InvalidValue)}; + } + } } // ===================================================================================================================== diff --git a/lgc/builder/MiscBuilder.cpp b/lgc/builder/MiscBuilder.cpp index 3156266bba..3817508142 100644 --- a/lgc/builder/MiscBuilder.cpp +++ b/lgc/builder/MiscBuilder.cpp @@ -112,7 +112,7 @@ Instruction *BuilderImpl::CreateKill(const Twine &instName) { // Create a "system halt" // // @param instName : Name to give instruction(s) -Instruction *BuilderImpl::CreateDebugBreak(const Twine &instName) { +Instruction *BuilderCommon::CreateDebugBreak(const Twine &instName) { return CreateIntrinsic(Intrinsic::amdgcn_s_sethalt, {}, getInt32(1), nullptr, instName); } diff --git a/lgc/builder/SubgroupBuilder.cpp b/lgc/builder/SubgroupBuilder.cpp index 84b224e591..6df4b1e5c0 100644 --- a/lgc/builder/SubgroupBuilder.cpp +++ b/lgc/builder/SubgroupBuilder.cpp @@ -79,7 +79,10 @@ unsigned BuilderImpl::getShaderWaveSize() { // // @param instName : Name to give final instruction. Value *SubgroupBuilder::CreateSubgroupElect(const Twine &instName) { - return CreateICmpEQ(CreateSubgroupMbcnt(createGroupBallot(getTrue()), ""), getInt32(0)); + bool excludeHelperLanes = false; + if (getShaderStage(GetInsertBlock()->getParent()).value() == ShaderStage::Fragment) + excludeHelperLanes = m_pipelineState->getShaderModes()->getFragmentShaderMode().waveOpsExcludeHelperLanes; + return CreateICmpEQ(CreateSubgroupMbcnt(createGroupBallot(getTrue(), excludeHelperLanes)), getInt32(0)); } // ===================================================================================================================== @@ -95,7 +98,8 @@ Value *BuilderImpl::CreateSubgroupAll(Value *const value, const Twine &instName) const auto &fragmentMode = m_pipelineState->getShaderModes()->getFragmentShaderMode(); if (m_shaderStage == ShaderStage::Fragment && !fragmentMode.waveOpsExcludeHelperLanes) { result = CreateZExt(result, getInt32Ty()); - result = CreateIntrinsic(Intrinsic::amdgcn_softwqm, {getInt32Ty()}, {result}); + result = CreateIntrinsic(fragmentMode.waveOpsRequireHelperLanes ? Intrinsic::amdgcn_wqm : Intrinsic::amdgcn_softwqm, + {getInt32Ty()}, {result}); result = CreateTrunc(result, getInt1Ty()); } return result; @@ -107,14 +111,25 @@ Value *BuilderImpl::CreateSubgroupAll(Value *const value, const Twine &instName) // @param value : The value to compare across the subgroup. Must be an integer type. // @param instName : Name to give final instruction. Value *SubgroupBuilder::CreateSubgroupAny(Value *const value, const Twine &instName) { - Value *result = CreateICmpNE(createGroupBallot(value), getInt64(0)); + bool ballotExcludeHelperLanes = false; + bool includeHelperLanes = false; + bool requireHelperLanes = false; + + if (getShaderStage(GetInsertBlock()->getParent()).value() == ShaderStage::Fragment) { + const auto &fragmentMode = m_pipelineState->getShaderModes()->getFragmentShaderMode(); + ballotExcludeHelperLanes = fragmentMode.waveOpsExcludeHelperLanes; + includeHelperLanes = !fragmentMode.waveOpsExcludeHelperLanes; + requireHelperLanes = fragmentMode.waveOpsRequireHelperLanes; + } + + Value *result = CreateICmpNE(createGroupBallot(value, ballotExcludeHelperLanes), getInt64(0)); result = CreateSelect(CreateUnaryIntrinsic(Intrinsic::is_constant, value), value, result); // Helper invocations of whole quad mode should be included in the subgroup vote execution - const auto &fragmentMode = m_pipelineState->getShaderModes()->getFragmentShaderMode(); - if (m_shaderStage == ShaderStage::Fragment && !fragmentMode.waveOpsExcludeHelperLanes) { + if (includeHelperLanes) { result = CreateZExt(result, getInt32Ty()); - result = CreateIntrinsic(Intrinsic::amdgcn_softwqm, {getInt32Ty()}, {result}); + result = CreateIntrinsic(requireHelperLanes ? Intrinsic::amdgcn_wqm : Intrinsic::amdgcn_softwqm, {getInt32Ty()}, + {result}); result = CreateTrunc(result, getInt1Ty()); } return result; @@ -400,7 +415,14 @@ Value *BuilderImpl::CreateSubgroupShuffle(Value *const value, Value *const index auto const sameOrOtherHalf = CreateAnd(CreateXor(index, threadId), getInt32(32)); auto const indexInSameHalf = CreateICmpEQ(sameOrOtherHalf, getInt32(0)); - return CreateSelect(indexInSameHalf, bPermSameHalf, bPermOtherHalf); + auto result = CreateSelect(indexInSameHalf, bPermSameHalf, bPermOtherHalf); + + // If required, force inputs of the operation to be computed in WQM. + if (m_shaderStage == ShaderStage::Fragment && + m_pipelineState->getShaderModes()->getFragmentShaderMode().waveOpsRequireHelperLanes) + result = createWqm(result); + + return result; } auto mapFunc = [this](BuilderBase &builder, ArrayRef mappedArgs, @@ -459,7 +481,7 @@ Value *BuilderImpl::CreateSubgroupShuffleXor(Value *const value, Value *const ma break; } - if (!canOptimize && supportDppRowXmask()) { + if (!canOptimize) { canOptimize = true; switch (maskValue) { case 4: @@ -500,7 +522,7 @@ Value *BuilderImpl::CreateSubgroupShuffleXor(Value *const value, Value *const ma if (maskValue < 32) { if (canOptimize) return createDppMov(value, dppCtrl, 0xF, 0xF, true); - if (supportPermLaneDpp() && (maskValue >= 16)) { + if (maskValue >= 16) { static const unsigned LaneSelBits[16][2] = { {0x76543210, 0xfedcba98}, {0x67452301, 0xefcdab89}, {0x54761032, 0xdcfe98ba}, {0x45670123, 0xcdef89ab}, {0x32107654, 0xba98fedc}, {0x23016745, 0xab89efcd}, {0x10325476, 0x98badcfe}, {0x1234567, 0x89abcdef}, @@ -593,58 +615,34 @@ Value *BuilderImpl::CreateSubgroupClusteredReduction(GroupArithOp groupArithOp, groupArithOp, result, createDppUpdate(identity, result, DppCtrl::DppRowMirror, 0xF, 0xF, true)), result); - if (supportPermLaneDpp()) { - // Use a permute lane to cross rows (row 1 <-> row 0, row 3 <-> row 2). - result = - CreateSelect(CreateICmpUGE(clusterSize, getInt32(32)), - createGroupArithmeticOperation( - groupArithOp, result, createPermLaneX16(result, result, UINT32_MAX, UINT32_MAX, true, false)), - result); - - if (supportPermLane64Dpp()) { - result = CreateSelect(CreateICmpEQ(clusterSize, getInt32(64)), - createGroupArithmeticOperation(groupArithOp, result, createPermLane64(result)), result); - } else { - Value *const broadcast31 = CreateSubgroupBroadcast(result, getInt32(31), instName); - Value *const broadcast63 = CreateSubgroupBroadcast(result, getInt32(63), instName); - - // Combine broadcast from the 31st and 63rd for the final result. - result = CreateSelect(CreateICmpEQ(clusterSize, getInt32(64)), - createGroupArithmeticOperation(groupArithOp, broadcast31, broadcast63), result); - } - } else { - // Use a row broadcast to move the 15th element in each cluster of 16 to the next cluster. The row mask is - // set to 0xa (0b1010) so that only the 2nd and 4th clusters of 16 perform the calculation. - result = CreateSelect( - CreateICmpUGE(clusterSize, getInt32(32)), - createGroupArithmeticOperation(groupArithOp, result, - createDppUpdate(identity, result, DppCtrl::DppRowBcast15, 0xA, 0xF, true)), - result); - - // Use a row broadcast to move the 31st element from the lower cluster of 32 to the upper cluster. The row - // mask is set to 0x8 (0b1000) so that only the upper cluster of 32 perform the calculation. - result = CreateSelect( - CreateICmpEQ(clusterSize, getInt32(64)), - createGroupArithmeticOperation(groupArithOp, result, - createDppUpdate(identity, result, DppCtrl::DppRowBcast31, 0x8, 0xF, true)), - result); + // Use a permute lane to cross rows (row 1 <-> row 0, row 3 <-> row 2). + result = + CreateSelect(CreateICmpUGE(clusterSize, getInt32(32)), + createGroupArithmeticOperation( + groupArithOp, result, createPermLaneX16(result, result, UINT32_MAX, UINT32_MAX, true, false)), + result); + if (supportPermLane64Dpp()) { + result = CreateSelect(CreateICmpEQ(clusterSize, getInt32(64)), + createGroupArithmeticOperation(groupArithOp, result, createPermLane64(result)), result); + } else { Value *const broadcast31 = CreateSubgroupBroadcast(result, getInt32(31), instName); Value *const broadcast63 = CreateSubgroupBroadcast(result, getInt32(63), instName); - // If the cluster size is 64 we always read the value from the last invocation in the subgroup. - result = CreateSelect(CreateICmpEQ(clusterSize, getInt32(64)), broadcast63, result); - - Value *const laneIdLessThan32 = CreateICmpULT(CreateSubgroupMbcnt(getInt64(UINT64_MAX), ""), getInt32(32)); - - // If the cluster size is 32 we need to check where our invocation is in the subgroup, and conditionally use - // invocation 31 or 63's value. - result = CreateSelect(CreateICmpEQ(clusterSize, getInt32(32)), - CreateSelect(laneIdLessThan32, broadcast31, broadcast63), result); + // Combine broadcast from the 31st and 63rd for the final result. + result = CreateSelect(CreateICmpEQ(clusterSize, getInt32(64)), + createGroupArithmeticOperation(groupArithOp, broadcast31, broadcast63), result); } // Finish the WWM section by calling the intrinsic. - return createWwm(result); + result = createWwm(result); + + // If required, force inputs of the operation to be computed in WQM. + if (m_shaderStage == ShaderStage::Fragment && + m_pipelineState->getShaderModes()->getFragmentShaderMode().waveOpsRequireHelperLanes) + result = createWqm(result); + + return result; } // ===================================================================================================================== @@ -699,44 +697,32 @@ Value *BuilderImpl::CreateSubgroupClusteredInclusive(GroupArithOp groupArithOp, groupArithOp, result, createDppUpdate(identity, result, DppCtrl::DppRowSr8, 0xF, 0xC, 0)), result); - if (supportPermLaneDpp()) { - Value *const threadMask = createThreadMask(); + Value *const threadMask = createThreadMask(); - Value *const maskedPermLane = - createThreadMaskedSelect(threadMask, 0xFFFF0000FFFF0000, - createPermLaneX16(result, result, UINT32_MAX, UINT32_MAX, true, false), identity); + Value *const maskedPermLane = createThreadMaskedSelect( + threadMask, 0xFFFF0000FFFF0000, createPermLaneX16(result, result, UINT32_MAX, UINT32_MAX, true, false), identity); - // Use a permute lane to cross rows (row 1 <-> row 0, row 3 <-> row 2). - result = CreateSelect(CreateICmpUGE(clusterSize, getInt32(32)), - createGroupArithmeticOperation(groupArithOp, result, maskedPermLane), result); + // Use a permute lane to cross rows (row 1 <-> row 0, row 3 <-> row 2). + result = CreateSelect(CreateICmpUGE(clusterSize, getInt32(32)), + createGroupArithmeticOperation(groupArithOp, result, maskedPermLane), result); - Value *const broadcast31 = CreateSubgroupBroadcast(result, getInt32(31), instName); + Value *const broadcast31 = CreateSubgroupBroadcast(result, getInt32(31), instName); - Value *const maskedBroadcast = createThreadMaskedSelect(threadMask, 0xFFFFFFFF00000000, broadcast31, identity); + Value *const maskedBroadcast = createThreadMaskedSelect(threadMask, 0xFFFFFFFF00000000, broadcast31, identity); - // Combine broadcast of 31 with the top two rows only. - result = CreateSelect(CreateICmpEQ(clusterSize, getInt32(64)), - createGroupArithmeticOperation(groupArithOp, result, maskedBroadcast), result); - } else { - // The DPP operation has a row mask of 0xa (0b1010) so only the 2nd and 4th clusters of 16 perform the - // operation. - result = CreateSelect( - CreateICmpUGE(clusterSize, getInt32(32)), - createGroupArithmeticOperation(groupArithOp, result, - createDppUpdate(identity, result, DppCtrl::DppRowBcast15, 0xA, 0xF, true)), - result); - - // The DPP operation has a row mask of 0xc (0b1100) so only the 3rd and 4th clusters of 16 perform the - // operation. - result = CreateSelect( - CreateICmpEQ(clusterSize, getInt32(64)), - createGroupArithmeticOperation(groupArithOp, result, - createDppUpdate(identity, result, DppCtrl::DppRowBcast31, 0xC, 0xF, true)), - result); - } + // Combine broadcast of 31 with the top two rows only. + result = CreateSelect(CreateICmpEQ(clusterSize, getInt32(64)), + createGroupArithmeticOperation(groupArithOp, result, maskedBroadcast), result); // Finish the WWM section by calling the intrinsic. - return createWwm(result); + result = createWwm(result); + + // If required, force inputs of the operation to be computed in WQM. + if (m_shaderStage == ShaderStage::Fragment && + m_pipelineState->getShaderModes()->getFragmentShaderMode().waveOpsRequireHelperLanes) + result = createWqm(result); + + return result; } // ===================================================================================================================== @@ -765,33 +751,27 @@ Value *BuilderImpl::CreateSubgroupClusteredExclusive(GroupArithOp groupArithOp, Value *shiftRight = nullptr; - if (supportPermLaneDpp()) { - Value *const threadMask = createThreadMask(); + Value *const threadMask = createThreadMask(); - // Shift right within each row: - // 0b0110,0101,0100,0011,0010,0001,0000,1111 = 0x6543210F - // 0b1110,1101,1100,1011,1010,1001,1000,0111 = 0xEDCBA987 - shiftRight = createPermLane16(setInactive, setInactive, 0x6543210F, 0xEDCBA987, true, false); + // Shift right within each row: + // 0b0110,0101,0100,0011,0010,0001,0000,1111 = 0x6543210F + // 0b1110,1101,1100,1011,1010,1001,1000,0111 = 0xEDCBA987 + shiftRight = createPermLane16(setInactive, setInactive, 0x6543210F, 0xEDCBA987, true, false); - // Only needed for wave size 64. - if (getShaderWaveSize() == 64) { - // Need to write the value from the 16th invocation into the 48th. - shiftRight = CreateSubgroupWriteInvocation(shiftRight, CreateSubgroupBroadcast(shiftRight, getInt32(16), ""), - getInt32(48), ""); - } + // Only needed for wave size 64. + if (getShaderWaveSize() == 64) { + // Need to write the value from the 16th invocation into the 48th. + shiftRight = CreateSubgroupWriteInvocation(shiftRight, CreateSubgroupBroadcast(shiftRight, getInt32(16), ""), + getInt32(48), ""); + } - shiftRight = CreateSubgroupWriteInvocation(shiftRight, identity, getInt32(16), ""); + shiftRight = CreateSubgroupWriteInvocation(shiftRight, identity, getInt32(16), ""); - // Exchange first column value cross rows(row 1<--> row 0, row 3<-->row2) - // Only first column value from each row join permlanex - shiftRight = - createThreadMaskedSelect(threadMask, 0x0001000100010001, - createPermLaneX16(shiftRight, shiftRight, 0, UINT32_MAX, true, false), shiftRight); - } else { - // Shift the whole subgroup right by one, using a DPP update operation. This will ensure that the identity - // value is in the 0th invocation and all other values are shifted up. All rows and banks are active (0xF). - shiftRight = createDppUpdate(identity, setInactive, DppCtrl::DppWfSr1, 0xF, 0xF, 0); - } + // Exchange first column value cross rows(row 1<--> row 0, row 3<-->row2) + // Only first column value from each row join permlanex + shiftRight = + createThreadMaskedSelect(threadMask, 0x0001000100010001, + createPermLaneX16(shiftRight, shiftRight, 0, UINT32_MAX, true, false), shiftRight); // The DPP operation has all rows active and all banks in the rows active (0xF). Value *result = CreateSelect( @@ -828,44 +808,30 @@ Value *BuilderImpl::CreateSubgroupClusteredExclusive(GroupArithOp groupArithOp, groupArithOp, result, createDppUpdate(identity, result, DppCtrl::DppRowSr8, 0xF, 0xC, 0)), result); - if (supportPermLaneDpp()) { - Value *const threadMask = createThreadMask(); - - Value *const maskedPermLane = - createThreadMaskedSelect(threadMask, 0xFFFF0000FFFF0000, - createPermLaneX16(result, result, UINT32_MAX, UINT32_MAX, true, false), identity); + Value *const maskedPermLane = createThreadMaskedSelect( + threadMask, 0xFFFF0000FFFF0000, createPermLaneX16(result, result, UINT32_MAX, UINT32_MAX, true, false), identity); - // Use a permute lane to cross rows (row 1 <-> row 0, row 3 <-> row 2). - result = CreateSelect(CreateICmpUGE(clusterSize, getInt32(32)), - createGroupArithmeticOperation(groupArithOp, result, maskedPermLane), result); + // Use a permute lane to cross rows (row 1 <-> row 0, row 3 <-> row 2). + result = CreateSelect(CreateICmpUGE(clusterSize, getInt32(32)), + createGroupArithmeticOperation(groupArithOp, result, maskedPermLane), result); - Value *const broadcast31 = CreateSubgroupBroadcast(result, getInt32(31), instName); + Value *const broadcast31 = CreateSubgroupBroadcast(result, getInt32(31), instName); - Value *const maskedBroadcast = createThreadMaskedSelect(threadMask, 0xFFFFFFFF00000000, broadcast31, identity); + Value *const maskedBroadcast = createThreadMaskedSelect(threadMask, 0xFFFFFFFF00000000, broadcast31, identity); - // Combine broadcast of 31 with the top two rows only. - result = CreateSelect(CreateICmpEQ(clusterSize, getInt32(64)), - createGroupArithmeticOperation(groupArithOp, result, maskedBroadcast), result); - } else { - // The DPP operation has a row mask of 0xa (0b1010) so only the 2nd and 4th clusters of 16 perform the - // operation. - result = CreateSelect( - CreateICmpUGE(clusterSize, getInt32(32)), - createGroupArithmeticOperation(groupArithOp, result, - createDppUpdate(identity, result, DppCtrl::DppRowBcast15, 0xA, 0xF, true)), - result); - - // The DPP operation has a row mask of 0xc (0b1100) so only the 3rd and 4th clusters of 16 perform the - // operation. - result = CreateSelect( - CreateICmpEQ(clusterSize, getInt32(64)), - createGroupArithmeticOperation(groupArithOp, result, - createDppUpdate(identity, result, DppCtrl::DppRowBcast31, 0xC, 0xF, true)), - result); - } + // Combine broadcast of 31 with the top two rows only. + result = CreateSelect(CreateICmpEQ(clusterSize, getInt32(64)), + createGroupArithmeticOperation(groupArithOp, result, maskedBroadcast), result); // Finish the WWM section by calling the intrinsic. - return createWwm(result); + result = createWwm(result); + + // If required, force inputs of the operation to be computed in WQM. + if (m_shaderStage == ShaderStage::Fragment && + m_pipelineState->getShaderModes()->getFragmentShaderMode().waveOpsRequireHelperLanes) + result = createWqm(result); + + return result; } // ===================================================================================================================== @@ -1425,15 +1391,15 @@ Value *BuilderImpl::createThreadMaskedSelect(Value *const threadMask, uint64_t a // Do group ballot, turning a per-lane boolean value (in a VGPR) into a subgroup-wide shared SGPR. // // @param value : The value to contribute to the SGPR, must be an boolean type. -Value *BuilderImpl::createGroupBallot(Value *const value) { +// @param excludeHelperLanes : exclude helper lanes. +Value *BuilderImpl::createGroupBallot(Value *const value, bool excludeHelperLanes) { // Check the type is definitely an boolean. assert(value->getType()->isIntegerTy(1)); Value *result = value; // For waveOpsExcludeHelperLanes mode, we need mask away the helperlane. - const auto &fragmentMode = m_pipelineState->getShaderModes()->getFragmentShaderMode(); - if (m_shaderStage == ShaderStage::Fragment && fragmentMode.waveOpsExcludeHelperLanes) { + if (excludeHelperLanes) { auto isLive = CreateIntrinsic(Intrinsic::amdgcn_live_mask, {}, {}, nullptr, {}); result = CreateAnd(isLive, result); } @@ -1448,6 +1414,18 @@ Value *BuilderImpl::createGroupBallot(Value *const value) { return result; } +// ===================================================================================================================== +// Do group ballot, turning a per-lane boolean value (in a VGPR) into a subgroup-wide shared SGPR. +// +// @param value : The value to contribute to the SGPR, must be an boolean type. +Value *BuilderImpl::createGroupBallot(Value *const value) { + // For waveOpsExcludeHelperLanes mode, we need mask away the helperlane. + bool excludeHelperLanes = false; + if (m_shaderStage == ShaderStage::Fragment) + excludeHelperLanes = m_pipelineState->getShaderModes()->getFragmentShaderMode().waveOpsExcludeHelperLanes; + return createGroupBallot(value, excludeHelperLanes); +} + // ===================================================================================================================== // Search the MSB index of the mask, not handle zero. // diff --git a/lgc/builder/YCbCrAddressHandler.cpp b/lgc/builder/YCbCrAddressHandler.cpp index f8636534cc..3674048e1f 100644 --- a/lgc/builder/YCbCrAddressHandler.cpp +++ b/lgc/builder/YCbCrAddressHandler.cpp @@ -29,7 +29,7 @@ *********************************************************************************************************************** */ #include "YCbCrAddressHandler.h" -#include "chip/gfx9/gfx9_plus_merged_enum.h" +#include "lgc/state/AbiMetadata.h" #include "lgc/util/GfxRegHandler.h" #include "lgc/util/Internal.h" #include "llvm/IR/Intrinsics.h" @@ -64,8 +64,7 @@ void YCbCrAddressHandler::genBaseAddress(unsigned planeCount) { } case 10: { // Judge if current swizzle mode is SW_64KB_R_X - Value *isSw64KbRxMode = - m_builder->CreateICmpEQ(m_swizzleMode, m_builder->getInt32(Pal::Gfx9::Chip::SWIZZLE_MODE_ENUM::SW_64KB_R_X)); + Value *isSw64KbRxMode = m_builder->CreateICmpEQ(m_swizzleMode, m_builder->getInt32(SWIZZLE_MODE_ENUM::SW_64KB_R_X)); const unsigned pipesLog2 = 3; const unsigned columnBits = 2; diff --git a/lgc/disassembler/Disassembler.cpp b/lgc/disassembler/Disassembler.cpp index d506ca9745..4c711565c5 100644 --- a/lgc/disassembler/Disassembler.cpp +++ b/lgc/disassembler/Disassembler.cpp @@ -44,6 +44,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/StringSaver.h" @@ -143,6 +144,56 @@ class ObjDisassembler { } // anonymous namespace +// ===================================================================================================================== +// Disassemble an archive of ELFs. We put the disassembled code into a new archive with same member +// names with ".S" suffix. +static void disassembleArchive(MemoryBufferRef data, raw_ostream &ostream) { + Error err = Error::success(); + SmallVector disassembledMembers; + SmallVector> strBuffers; + object::Archive archive(data, err); + if (!err) { + for (object::Archive::Child child : archive.children(err)) { + if (err) + break; + Expected name = child.getName(); + if (err = name.takeError()) + break; + Expected contents = child.getMemoryBufferRef(); + if (err = contents.takeError()) + break; + strBuffers.resize(strBuffers.size() + 2); + SmallString<0> &nameBuffer = strBuffers[strBuffers.size() - 2]; + SmallString<0> &disBuffer = strBuffers.back(); + nameBuffer = *name; + nameBuffer += ".S"; + raw_svector_ostream disasmStream(disBuffer); + disasmStream << "// Member " << *name << ":\n"; + ObjDisassembler::disassembleObject(*contents, disasmStream); + disassembledMembers.emplace_back(MemoryBufferRef(disBuffer, nameBuffer)); + } + } + + if (!err) { +#if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 472105 + Expected> newArchive = + writeArchiveToBuffer(disassembledMembers, SymtabWritingMode::NoSymtab, object::Archive::Kind::K_GNU, + /*Deterministic=*/true, /*Thin=*/false); +#else + Expected> newArchive = + writeArchiveToBuffer(disassembledMembers, /*WriteSymtab=*/false, object::Archive::Kind::K_GNU, + /*Deterministic=*/true, /*Thin=*/false); +#endif + if (!newArchive) + err = newArchive.takeError(); + else + ostream << (*newArchive)->getBuffer(); + } + + if (err) + report_fatal_error(std::move(err)); +} + // ===================================================================================================================== // Disassemble an ELF object into ostream. Does report_fatal_error on error. // @@ -154,7 +205,13 @@ void lgc::disassembleObject(MemoryBufferRef data, raw_ostream &ostream) { InitializeAllTargetMCs(); InitializeAllDisassemblers(); - // Do the disassembly. + if (data.getBuffer().starts_with("!\n")) { + // Disassemble archive of ELFs. + disassembleArchive(data, ostream); + return; + } + + // Attempt to disassemble ELF. ObjDisassembler::disassembleObject(data, ostream); } diff --git a/lgc/elfLinker/ColorExportShader.cpp b/lgc/elfLinker/ColorExportShader.cpp index 05069d2ee0..c2a8ca7a1b 100644 --- a/lgc/elfLinker/ColorExportShader.cpp +++ b/lgc/elfLinker/ColorExportShader.cpp @@ -48,16 +48,10 @@ ColorExportShader::ColorExportShader(PipelineState *pipelineState, ArrayRefgetOptions().enableColorExportShader) { PalMetadata *metadata = pipelineState->getPalMetadata(); - if (pipelineState->useRegisterFieldFormat()) { - auto dbShaderControl = metadata->getPipelineNode()[Util::Abi::PipelineMetadataKey::GraphicsRegisters] - .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::DbShaderControl] - .getMap(true); - m_killEnabled = dbShaderControl[Util::Abi::DbShaderControlMetadataKey::KillEnable].getBool(); - } else { - DB_SHADER_CONTROL shaderControl = {}; - shaderControl.u32All = metadata->getRegister(mmDB_SHADER_CONTROL); - m_killEnabled = shaderControl.bits.KILL_ENABLE; - } + auto dbShaderControl = metadata->getPipelineNode()[Util::Abi::PipelineMetadataKey::GraphicsRegisters] + .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::DbShaderControl] + .getMap(true); + m_killEnabled = dbShaderControl[Util::Abi::DbShaderControlMetadataKey::KillEnable].getBool(); } m_key = FragColorExport::computeKey(exports, pipelineState); } @@ -139,7 +133,7 @@ Module *ColorExportShader::generate() { ++lastIndex; } - PalMetadata palMetadata{m_pipelineState, m_pipelineState->useRegisterFieldFormat()}; + PalMetadata palMetadata{m_pipelineState}; Value *dynamicIsDualSource = colorExportFunc->getArg(lastIndex); fragColorExport.generateExportInstructions(m_exports, values, m_killEnabled, &palMetadata, builder, @@ -210,10 +204,8 @@ Function *ColorExportShader::createColorExportFunc() { AttrBuilder attribBuilder(func->getContext()); attribBuilder.addAttribute("InitialPSInputAddr", std::to_string(0xFFFFFFFF)); - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 10) { - const unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Fragment); - attribBuilder.addAttribute("target-features", ",+wavefrontsize" + std::to_string(waveSize)); // Set wavefront size - } + const unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Fragment); + attribBuilder.addAttribute("target-features", ",+wavefrontsize" + std::to_string(waveSize)); // Set wavefront size func->addFnAttrs(attribBuilder); return func; diff --git a/lgc/elfLinker/ElfLinker.cpp b/lgc/elfLinker/ElfLinker.cpp index 57de76edcb..7fdda5b912 100644 --- a/lgc/elfLinker/ElfLinker.cpp +++ b/lgc/elfLinker/ElfLinker.cpp @@ -872,9 +872,8 @@ void OutputSection::write(raw_pwrite_stream &outStream, ELF::Elf64_Shdr *shdr) { const char *padding = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; const char *endPadding = nullptr; if (shdr->sh_flags & ELF::SHF_EXECINSTR) { - padding = "\0\0\x80\xBF\0\0\x80\xBF\0\0\x80\xBF\0\0\x80\xBF"; // s_nop - if (m_linker->getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 10) - endPadding = "\0\0\x9F\xBF\0\0\x9F\xBF\0\0\x9F\xBF\0\0\x9F\xBF"; // s_code_end + padding = "\0\0\x80\xBF\0\0\x80\xBF\0\0\x80\xBF\0\0\x80\xBF"; // s_nop + endPadding = "\0\0\x9F\xBF\0\0\x9F\xBF\0\0\x9F\xBF\0\0\x9F\xBF"; // s_code_end } // Output the contributions from the input sections. diff --git a/lgc/imported/.clang-format b/lgc/imported/.clang-format deleted file mode 100644 index 9d159247d5..0000000000 --- a/lgc/imported/.clang-format +++ /dev/null @@ -1,2 +0,0 @@ -DisableFormat: true -SortIncludes: false diff --git a/lgc/imported/chip/gfx9/gfx9_plus_merged_enum.h b/lgc/imported/chip/gfx9/gfx9_plus_merged_enum.h deleted file mode 100644 index ebd7e0a7ce..0000000000 --- a/lgc/imported/chip/gfx9/gfx9_plus_merged_enum.h +++ /dev/null @@ -1,21042 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ - -#pragma once - -namespace Pal -{ -namespace Gfx9 -{ -inline namespace Chip -{ - -typedef enum BinEventCntl { - BINNER_BREAK_BATCH = 0x00000000, - BINNER_PIPELINE = 0x00000001, - BINNER_DROP = 0x00000002, - BINNER_DROP_ASSERT__GFX09 = 0x00000003, - BINNER_PIPELINE_BREAK__GFX10PLUS = 0x00000003, -} BinEventCntl; - -typedef enum BinMapMode { - BIN_MAP_MODE_NONE = 0x00000000, - BIN_MAP_MODE_RTA_INDEX = 0x00000001, - BIN_MAP_MODE_POPS__GFX10COREPLUS = 0x00000002, -} BinMapMode; - -typedef enum BinningMode { - BINNING_ALLOWED = 0x00000000, - FORCE_BINNING_ON = 0x00000001, - DISABLE_BINNING_USE_NEW_SC__GFX09_10 = 0x00000002, - DISABLE_BINNING_USE_LEGACY_SC__GFX09_10 = 0x00000003, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - BINNING_ONE_PRIM_PER_BATCH__GFX11 = 0x00000002, - BINNING_DISABLED__GFX11 = 0x00000003, -#endif -} BinningMode; - -typedef enum BinSizeExtend { - BIN_SIZE_32_PIXELS = 0x00000000, - BIN_SIZE_64_PIXELS = 0x00000001, - BIN_SIZE_128_PIXELS = 0x00000002, - BIN_SIZE_256_PIXELS = 0x00000003, - BIN_SIZE_512_PIXELS = 0x00000004, -} BinSizeExtend; - -typedef enum BlendOp { - BLEND_ZERO = 0x00000000, - BLEND_ONE = 0x00000001, - BLEND_SRC_COLOR = 0x00000002, - BLEND_ONE_MINUS_SRC_COLOR = 0x00000003, - BLEND_SRC_ALPHA = 0x00000004, - BLEND_ONE_MINUS_SRC_ALPHA = 0x00000005, - BLEND_DST_ALPHA = 0x00000006, - BLEND_ONE_MINUS_DST_ALPHA = 0x00000007, - BLEND_DST_COLOR = 0x00000008, - BLEND_ONE_MINUS_DST_COLOR = 0x00000009, - BLEND_SRC_ALPHA_SATURATE = 0x0000000a, - BLEND_BOTH_SRC_ALPHA__GFX09_10 = 0x0000000b, - BLEND_BOTH_INV_SRC_ALPHA__GFX09_10 = 0x0000000c, - BLEND_CONSTANT_COLOR__GFX09_10 = 0x0000000d, - BLEND_ONE_MINUS_CONSTANT_COLOR__GFX09_10 = 0x0000000e, - BLEND_SRC1_COLOR__GFX09_10 = 0x0000000f, - BLEND_INV_SRC1_COLOR__GFX09_10 = 0x00000010, - BLEND_SRC1_ALPHA__GFX09_10 = 0x00000011, - BLEND_INV_SRC1_ALPHA__GFX09_10 = 0x00000012, - BLEND_CONSTANT_ALPHA__GFX09_10 = 0x00000013, - BLEND_ONE_MINUS_CONSTANT_ALPHA__GFX09_10 = 0x00000014, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - BLEND_CONSTANT_COLOR__GFX11 = 0x0000000b, - BLEND_ONE_MINUS_CONSTANT_COLOR__GFX11 = 0x0000000c, - BLEND_SRC1_COLOR__GFX11 = 0x0000000d, - BLEND_INV_SRC1_COLOR__GFX11 = 0x0000000e, - BLEND_SRC1_ALPHA__GFX11 = 0x0000000f, - BLEND_INV_SRC1_ALPHA__GFX11 = 0x00000010, - BLEND_CONSTANT_ALPHA__GFX11 = 0x00000011, - BLEND_ONE_MINUS_CONSTANT_ALPHA__GFX11 = 0x00000012, -#endif -} BlendOp; - -typedef enum BlendOpt { - FORCE_OPT_AUTO = 0x00000000, - FORCE_OPT_DISABLE = 0x00000001, - FORCE_OPT_ENABLE_IF_SRC_A_0 = 0x00000002, - FORCE_OPT_ENABLE_IF_SRC_RGB_0 = 0x00000003, - FORCE_OPT_ENABLE_IF_SRC_ARGB_0 = 0x00000004, - FORCE_OPT_ENABLE_IF_SRC_A_1 = 0x00000005, - FORCE_OPT_ENABLE_IF_SRC_RGB_1 = 0x00000006, - FORCE_OPT_ENABLE_IF_SRC_ARGB_1 = 0x00000007, -} BlendOpt; - -typedef enum BUF_DATA_FORMAT { - BUF_DATA_FORMAT_INVALID = 0x00000000, - BUF_DATA_FORMAT_8 = 0x00000001, - BUF_DATA_FORMAT_16 = 0x00000002, - BUF_DATA_FORMAT_8_8 = 0x00000003, - BUF_DATA_FORMAT_32 = 0x00000004, - BUF_DATA_FORMAT_16_16 = 0x00000005, - BUF_DATA_FORMAT_10_11_11 = 0x00000006, - BUF_DATA_FORMAT_11_11_10 = 0x00000007, - BUF_DATA_FORMAT_10_10_10_2 = 0x00000008, - BUF_DATA_FORMAT_2_10_10_10 = 0x00000009, - BUF_DATA_FORMAT_8_8_8_8 = 0x0000000a, - BUF_DATA_FORMAT_32_32 = 0x0000000b, - BUF_DATA_FORMAT_16_16_16_16 = 0x0000000c, - BUF_DATA_FORMAT_32_32_32 = 0x0000000d, - BUF_DATA_FORMAT_32_32_32_32 = 0x0000000e, - BUF_DATA_FORMAT_RESERVED_15 = 0x0000000f, -} BUF_DATA_FORMAT; - -typedef enum BUF_FMT { - BUF_FMT_INVALID = 0x00000000, - BUF_FMT_8_UNORM = 0x00000001, - BUF_FMT_8_SNORM = 0x00000002, - BUF_FMT_8_USCALED = 0x00000003, - BUF_FMT_8_SSCALED = 0x00000004, - BUF_FMT_8_UINT = 0x00000005, - BUF_FMT_8_SINT = 0x00000006, - BUF_FMT_16_UNORM = 0x00000007, - BUF_FMT_16_SNORM = 0x00000008, - BUF_FMT_16_USCALED = 0x00000009, - BUF_FMT_16_SSCALED = 0x0000000a, - BUF_FMT_16_UINT = 0x0000000b, - BUF_FMT_16_SINT = 0x0000000c, - BUF_FMT_16_FLOAT = 0x0000000d, - BUF_FMT_8_8_UNORM = 0x0000000e, - BUF_FMT_8_8_SNORM = 0x0000000f, - BUF_FMT_8_8_USCALED = 0x00000010, - BUF_FMT_8_8_SSCALED = 0x00000011, - BUF_FMT_8_8_UINT = 0x00000012, - BUF_FMT_8_8_SINT = 0x00000013, - BUF_FMT_32_UINT = 0x00000014, - BUF_FMT_32_SINT = 0x00000015, - BUF_FMT_32_FLOAT = 0x00000016, - BUF_FMT_16_16_UNORM = 0x00000017, - BUF_FMT_16_16_SNORM = 0x00000018, - BUF_FMT_16_16_USCALED = 0x00000019, - BUF_FMT_16_16_SSCALED = 0x0000001a, - BUF_FMT_16_16_UINT = 0x0000001b, - BUF_FMT_16_16_SINT = 0x0000001c, - BUF_FMT_16_16_FLOAT = 0x0000001d, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - BUF_FMT_10_11_11_FLOAT__GFX104PLUS = 0x0000001e, - BUF_FMT_11_11_10_FLOAT__GFX104PLUS = 0x0000001f, - BUF_FMT_10_10_10_2_UNORM__GFX104PLUS = 0x00000020, - BUF_FMT_10_10_10_2_SNORM__GFX104PLUS = 0x00000021, - BUF_FMT_10_10_10_2_UINT__GFX104PLUS = 0x00000022, - BUF_FMT_10_10_10_2_SINT__GFX104PLUS = 0x00000023, - BUF_FMT_2_10_10_10_UNORM__GFX104PLUS = 0x00000024, - BUF_FMT_2_10_10_10_SNORM__GFX104PLUS = 0x00000025, - BUF_FMT_2_10_10_10_USCALED__GFX104PLUS = 0x00000026, - BUF_FMT_2_10_10_10_SSCALED__GFX104PLUS = 0x00000027, - BUF_FMT_2_10_10_10_UINT__GFX104PLUS = 0x00000028, - BUF_FMT_2_10_10_10_SINT__GFX104PLUS = 0x00000029, - BUF_FMT_8_8_8_8_UNORM__GFX104PLUS = 0x0000002a, - BUF_FMT_8_8_8_8_SNORM__GFX104PLUS = 0x0000002b, - BUF_FMT_8_8_8_8_USCALED__GFX104PLUS = 0x0000002c, - BUF_FMT_8_8_8_8_SSCALED__GFX104PLUS = 0x0000002d, - BUF_FMT_8_8_8_8_UINT__GFX104PLUS = 0x0000002e, - BUF_FMT_8_8_8_8_SINT__GFX104PLUS = 0x0000002f, - BUF_FMT_32_32_UINT__GFX104PLUS = 0x00000030, - BUF_FMT_32_32_SINT__GFX104PLUS = 0x00000031, - BUF_FMT_32_32_FLOAT__GFX104PLUS = 0x00000032, - BUF_FMT_16_16_16_16_UNORM__GFX104PLUS = 0x00000033, - BUF_FMT_16_16_16_16_SNORM__GFX104PLUS = 0x00000034, - BUF_FMT_16_16_16_16_USCALED__GFX104PLUS = 0x00000035, - BUF_FMT_16_16_16_16_SSCALED__GFX104PLUS = 0x00000036, - BUF_FMT_16_16_16_16_UINT__GFX104PLUS = 0x00000037, - BUF_FMT_16_16_16_16_SINT__GFX104PLUS = 0x00000038, - BUF_FMT_16_16_16_16_FLOAT__GFX104PLUS = 0x00000039, - BUF_FMT_32_32_32_UINT__GFX104PLUS = 0x0000003a, - BUF_FMT_32_32_32_SINT__GFX104PLUS = 0x0000003b, - BUF_FMT_32_32_32_FLOAT__GFX104PLUS = 0x0000003c, - BUF_FMT_32_32_32_32_UINT__GFX104PLUS = 0x0000003d, - BUF_FMT_32_32_32_32_SINT__GFX104PLUS = 0x0000003e, - BUF_FMT_32_32_32_32_FLOAT__GFX104PLUS = 0x0000003f, -#endif - BUF_FMT_10_11_11_UNORM__GFX10CORE = 0x0000001e, - BUF_FMT_10_11_11_SNORM__GFX10CORE = 0x0000001f, - BUF_FMT_10_11_11_USCALED__GFX10CORE = 0x00000020, - BUF_FMT_10_11_11_SSCALED__GFX10CORE = 0x00000021, - BUF_FMT_10_11_11_UINT__GFX10CORE = 0x00000022, - BUF_FMT_10_11_11_SINT__GFX10CORE = 0x00000023, - BUF_FMT_10_11_11_FLOAT__GFX10CORE = 0x00000024, - BUF_FMT_11_11_10_UNORM__GFX10CORE = 0x00000025, - BUF_FMT_11_11_10_SNORM__GFX10CORE = 0x00000026, - BUF_FMT_11_11_10_USCALED__GFX10CORE = 0x00000027, - BUF_FMT_11_11_10_SSCALED__GFX10CORE = 0x00000028, - BUF_FMT_11_11_10_UINT__GFX10CORE = 0x00000029, - BUF_FMT_11_11_10_SINT__GFX10CORE = 0x0000002a, - BUF_FMT_11_11_10_FLOAT__GFX10CORE = 0x0000002b, - BUF_FMT_10_10_10_2_UNORM__GFX10CORE = 0x0000002c, - BUF_FMT_10_10_10_2_SNORM__GFX10CORE = 0x0000002d, - BUF_FMT_10_10_10_2_USCALED__GFX10CORE = 0x0000002e, - BUF_FMT_10_10_10_2_SSCALED__GFX10CORE = 0x0000002f, - BUF_FMT_10_10_10_2_UINT__GFX10CORE = 0x00000030, - BUF_FMT_10_10_10_2_SINT__GFX10CORE = 0x00000031, - BUF_FMT_2_10_10_10_UNORM__GFX10CORE = 0x00000032, - BUF_FMT_2_10_10_10_SNORM__GFX10CORE = 0x00000033, - BUF_FMT_2_10_10_10_USCALED__GFX10CORE = 0x00000034, - BUF_FMT_2_10_10_10_SSCALED__GFX10CORE = 0x00000035, - BUF_FMT_2_10_10_10_UINT__GFX10CORE = 0x00000036, - BUF_FMT_2_10_10_10_SINT__GFX10CORE = 0x00000037, - BUF_FMT_8_8_8_8_UNORM__GFX10CORE = 0x00000038, - BUF_FMT_8_8_8_8_SNORM__GFX10CORE = 0x00000039, - BUF_FMT_8_8_8_8_USCALED__GFX10CORE = 0x0000003a, - BUF_FMT_8_8_8_8_SSCALED__GFX10CORE = 0x0000003b, - BUF_FMT_8_8_8_8_UINT__GFX10CORE = 0x0000003c, - BUF_FMT_8_8_8_8_SINT__GFX10CORE = 0x0000003d, - BUF_FMT_32_32_UINT__GFX10CORE = 0x0000003e, - BUF_FMT_32_32_SINT__GFX10CORE = 0x0000003f, - BUF_FMT_32_32_FLOAT__GFX10CORE = 0x00000040, - BUF_FMT_16_16_16_16_UNORM__GFX10CORE = 0x00000041, - BUF_FMT_16_16_16_16_SNORM__GFX10CORE = 0x00000042, - BUF_FMT_16_16_16_16_USCALED__GFX10CORE = 0x00000043, - BUF_FMT_16_16_16_16_SSCALED__GFX10CORE = 0x00000044, - BUF_FMT_16_16_16_16_UINT__GFX10CORE = 0x00000045, - BUF_FMT_16_16_16_16_SINT__GFX10CORE = 0x00000046, - BUF_FMT_16_16_16_16_FLOAT__GFX10CORE = 0x00000047, - BUF_FMT_32_32_32_UINT__GFX10CORE = 0x00000048, - BUF_FMT_32_32_32_SINT__GFX10CORE = 0x00000049, - BUF_FMT_32_32_32_FLOAT__GFX10CORE = 0x0000004a, - BUF_FMT_32_32_32_32_UINT__GFX10CORE = 0x0000004b, - BUF_FMT_32_32_32_32_SINT__GFX10CORE = 0x0000004c, - BUF_FMT_32_32_32_32_FLOAT__GFX10CORE = 0x0000004d, - BUF_FMT_RESERVED_78__GFX10CORE = 0x0000004e, - BUF_FMT_RESERVED_79__GFX10CORE = 0x0000004f, - BUF_FMT_RESERVED_80__GFX10CORE = 0x00000050, - BUF_FMT_RESERVED_81__GFX10CORE = 0x00000051, - BUF_FMT_RESERVED_82__GFX10CORE = 0x00000052, - BUF_FMT_RESERVED_83__GFX10CORE = 0x00000053, - BUF_FMT_RESERVED_84__GFX10CORE = 0x00000054, - BUF_FMT_RESERVED_85__GFX10CORE = 0x00000055, - BUF_FMT_RESERVED_86__GFX10CORE = 0x00000056, - BUF_FMT_RESERVED_87__GFX10CORE = 0x00000057, - BUF_FMT_RESERVED_88__GFX10CORE = 0x00000058, - BUF_FMT_RESERVED_89__GFX10CORE = 0x00000059, - BUF_FMT_RESERVED_90__GFX10CORE = 0x0000005a, - BUF_FMT_RESERVED_91__GFX10CORE = 0x0000005b, - BUF_FMT_RESERVED_92__GFX10CORE = 0x0000005c, - BUF_FMT_RESERVED_93__GFX10CORE = 0x0000005d, - BUF_FMT_RESERVED_94__GFX10CORE = 0x0000005e, - BUF_FMT_RESERVED_95__GFX10CORE = 0x0000005f, - BUF_FMT_RESERVED_96__GFX10CORE = 0x00000060, - BUF_FMT_RESERVED_97__GFX10CORE = 0x00000061, - BUF_FMT_RESERVED_98__GFX10CORE = 0x00000062, - BUF_FMT_RESERVED_99__GFX10CORE = 0x00000063, - BUF_FMT_RESERVED_100__GFX10CORE = 0x00000064, - BUF_FMT_RESERVED_101__GFX10CORE = 0x00000065, - BUF_FMT_RESERVED_102__GFX10CORE = 0x00000066, - BUF_FMT_RESERVED_103__GFX10CORE = 0x00000067, - BUF_FMT_RESERVED_104__GFX10CORE = 0x00000068, - BUF_FMT_RESERVED_105__GFX10CORE = 0x00000069, - BUF_FMT_RESERVED_106__GFX10CORE = 0x0000006a, - BUF_FMT_RESERVED_107__GFX10CORE = 0x0000006b, - BUF_FMT_RESERVED_108__GFX10CORE = 0x0000006c, - BUF_FMT_RESERVED_109__GFX10CORE = 0x0000006d, - BUF_FMT_RESERVED_110__GFX10CORE = 0x0000006e, - BUF_FMT_RESERVED_111__GFX10CORE = 0x0000006f, - BUF_FMT_RESERVED_112__GFX10CORE = 0x00000070, - BUF_FMT_RESERVED_113__GFX10CORE = 0x00000071, - BUF_FMT_RESERVED_114__GFX10CORE = 0x00000072, - BUF_FMT_RESERVED_115__GFX10CORE = 0x00000073, - BUF_FMT_RESERVED_116__GFX10CORE = 0x00000074, - BUF_FMT_RESERVED_117__GFX10CORE = 0x00000075, - BUF_FMT_RESERVED_118__GFX10CORE = 0x00000076, - BUF_FMT_RESERVED_119__GFX10CORE = 0x00000077, - BUF_FMT_RESERVED_120__GFX10CORE = 0x00000078, - BUF_FMT_RESERVED_121__GFX10CORE = 0x00000079, - BUF_FMT_RESERVED_122__GFX10CORE = 0x0000007a, - BUF_FMT_RESERVED_123__GFX10CORE = 0x0000007b, - BUF_FMT_RESERVED_124__GFX10CORE = 0x0000007c, - BUF_FMT_RESERVED_125__GFX10CORE = 0x0000007d, - BUF_FMT_RESERVED_126__GFX10CORE = 0x0000007e, - BUF_FMT_RESERVED_127__GFX10CORE = 0x0000007f, -} BUF_FMT; - -typedef enum BUF_NUM_FORMAT { - BUF_NUM_FORMAT_UNORM = 0x00000000, - BUF_NUM_FORMAT_SNORM = 0x00000001, - BUF_NUM_FORMAT_USCALED = 0x00000002, - BUF_NUM_FORMAT_SSCALED = 0x00000003, - BUF_NUM_FORMAT_UINT = 0x00000004, - BUF_NUM_FORMAT_SINT = 0x00000005, - BUF_NUM_FORMAT_FLOAT = 0x00000007, - BUF_NUM_FORMAT_SNORM_NZ__GFX10CORE = 0x00000006, - BUF_NUM_FORMAT_RESERVED_6__NOTGFX10 = 0x00000006, -} BUF_NUM_FORMAT; - -typedef enum CBMode { - CB_DISABLE = 0x00000000, - CB_NORMAL = 0x00000001, - CB_ELIMINATE_FAST_CLEAR = 0x00000002, - CB_RESOLVE__GFX09_10 = 0x00000003, - CB_DECOMPRESS__GFX09_10 = 0x00000004, - CB_FMASK_DECOMPRESS__GFX09_10 = 0x00000005, - CB_DCC_DECOMPRESS__GFX09_10 = 0x00000006, - CB_RESERVED__GFX10 = 0x00000007, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - CB_DCC_DECOMPRESS__GFX11 = 0x00000003, - CB_RESERVED__GFX11 = 0x00000004, -#endif -} CBMode; - -typedef enum CBPerfClearFilterSel { - CB_PERF_CLEAR_FILTER_SEL_NONCLEAR = 0x00000000, - CB_PERF_CLEAR_FILTER_SEL_CLEAR = 0x00000001, -} CBPerfClearFilterSel; - -typedef enum CBPerfOpFilterSel { - CB_PERF_OP_FILTER_SEL_WRITE_ONLY = 0x00000000, - CB_PERF_OP_FILTER_SEL_NEEDS_DESTINATION = 0x00000001, - CB_PERF_OP_FILTER_SEL_RESOLVE = 0x00000002, - CB_PERF_OP_FILTER_SEL_DECOMPRESS = 0x00000003, - CB_PERF_OP_FILTER_SEL_FMASK_DECOMPRESS = 0x00000004, - CB_PERF_OP_FILTER_SEL_ELIMINATE_FAST_CLEAR = 0x00000005, -} CBPerfOpFilterSel; - -typedef enum CBPerfSel { - CB_PERF_SEL_BUSY = 0x00000001, - CB_PERF_SEL_QUAD_KILLED_BY_EXTRA_PIXEL_EXPORT__GFX09 = 0x0000001f, - CB_PERF_SEL_QUAD_KILLED_BY_COLOR_INVALID__GFX09 = 0x00000020, - CB_PERF_SEL_QUAD_KILLED_BY_NULL_TARGET_SHADER_MASK__GFX09 = 0x00000021, - CB_PERF_SEL_QUAD_KILLED_BY_NULL_SAMPLE_MASK__GFX09 = 0x00000022, - CB_PERF_SEL_QUAD_KILLED_BY_DISCARD_PIXEL__GFX09 = 0x00000023, - CB_PERF_SEL_CC_CACHE_HIT__GFX09 = 0x0000005f, - CB_PERF_SEL_QUAD_HAS_1_FRAGMENT_BEFORE_UPDATE__GFX09 = 0x000000a4, - CB_PERF_SEL_QUAD_HAS_2_FRAGMENTS_BEFORE_UPDATE__GFX09 = 0x000000a5, - CB_PERF_SEL_QUAD_HAS_3_FRAGMENTS_BEFORE_UPDATE__GFX09 = 0x000000a6, - CB_PERF_SEL_QUAD_HAS_4_FRAGMENTS_BEFORE_UPDATE__GFX09 = 0x000000a7, - CB_PERF_SEL_QUAD_HAS_5_FRAGMENTS_BEFORE_UPDATE__GFX09 = 0x000000a8, - CB_PERF_SEL_QUAD_HAS_6_FRAGMENTS_BEFORE_UPDATE__GFX09 = 0x000000a9, - CB_PERF_SEL_QUAD_HAS_7_FRAGMENTS_BEFORE_UPDATE__GFX09 = 0x000000aa, - CB_PERF_SEL_QUAD_HAS_8_FRAGMENTS_BEFORE_UPDATE__GFX09 = 0x000000ab, - CB_PERF_SEL_QUAD_HAS_1_FRAGMENT_AFTER_UPDATE__GFX09 = 0x000000ac, - CB_PERF_SEL_QUAD_HAS_2_FRAGMENTS_AFTER_UPDATE__GFX09 = 0x000000ad, - CB_PERF_SEL_QUAD_HAS_3_FRAGMENTS_AFTER_UPDATE__GFX09 = 0x000000ae, - CB_PERF_SEL_QUAD_HAS_4_FRAGMENTS_AFTER_UPDATE__GFX09 = 0x000000af, - CB_PERF_SEL_QUAD_HAS_5_FRAGMENTS_AFTER_UPDATE__GFX09 = 0x000000b0, - CB_PERF_SEL_QUAD_HAS_6_FRAGMENTS_AFTER_UPDATE__GFX09 = 0x000000b1, - CB_PERF_SEL_QUAD_HAS_7_FRAGMENTS_AFTER_UPDATE__GFX09 = 0x000000b2, - CB_PERF_SEL_QUAD_HAS_8_FRAGMENTS_AFTER_UPDATE__GFX09 = 0x000000b3, - CB_PERF_SEL_QUAD_ADDED_1_FRAGMENT__GFX09 = 0x000000b4, - CB_PERF_SEL_QUAD_ADDED_2_FRAGMENTS__GFX09 = 0x000000b5, - CB_PERF_SEL_QUAD_ADDED_3_FRAGMENTS__GFX09 = 0x000000b6, - CB_PERF_SEL_QUAD_ADDED_4_FRAGMENTS__GFX09 = 0x000000b7, - CB_PERF_SEL_QUAD_ADDED_5_FRAGMENTS__GFX09 = 0x000000b8, - CB_PERF_SEL_QUAD_ADDED_6_FRAGMENTS__GFX09 = 0x000000b9, - CB_PERF_SEL_QUAD_ADDED_7_FRAGMENTS__GFX09 = 0x000000ba, - CB_PERF_SEL_QUAD_REMOVED_1_FRAGMENT__GFX09 = 0x000000bb, - CB_PERF_SEL_QUAD_REMOVED_2_FRAGMENTS__GFX09 = 0x000000bc, - CB_PERF_SEL_QUAD_REMOVED_3_FRAGMENTS__GFX09 = 0x000000bd, - CB_PERF_SEL_QUAD_REMOVED_4_FRAGMENTS__GFX09 = 0x000000be, - CB_PERF_SEL_QUAD_REMOVED_5_FRAGMENTS__GFX09 = 0x000000bf, - CB_PERF_SEL_QUAD_REMOVED_6_FRAGMENTS__GFX09 = 0x000000c0, - CB_PERF_SEL_QUAD_REMOVED_7_FRAGMENTS__GFX09 = 0x000000c1, - CB_PERF_SEL_QUAD_READS_FRAGMENT_0__GFX09 = 0x000000c2, - CB_PERF_SEL_QUAD_READS_FRAGMENT_1__GFX09 = 0x000000c3, - CB_PERF_SEL_QUAD_READS_FRAGMENT_2__GFX09 = 0x000000c4, - CB_PERF_SEL_QUAD_READS_FRAGMENT_3__GFX09 = 0x000000c5, - CB_PERF_SEL_QUAD_READS_FRAGMENT_4__GFX09 = 0x000000c6, - CB_PERF_SEL_QUAD_READS_FRAGMENT_5__GFX09 = 0x000000c7, - CB_PERF_SEL_QUAD_READS_FRAGMENT_6__GFX09 = 0x000000c8, - CB_PERF_SEL_QUAD_READS_FRAGMENT_7__GFX09 = 0x000000c9, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_0__GFX09 = 0x000000ca, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_1__GFX09 = 0x000000cb, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_2__GFX09 = 0x000000cc, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_3__GFX09 = 0x000000cd, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_4__GFX09 = 0x000000ce, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_5__GFX09 = 0x000000cf, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_6__GFX09 = 0x000000d0, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_7__GFX09 = 0x000000d1, - CB_PERF_SEL_QUAD_BLEND_OPT_DONT_READ_DST__GFX09 = 0x000000d2, - CB_PERF_SEL_QUAD_BLEND_OPT_BLEND_BYPASS__GFX09 = 0x000000d3, - CB_PERF_SEL_QUAD_BLEND_OPT_DISCARD_PIXELS__GFX09 = 0x000000d4, - CB_PERF_SEL_QUAD_DST_READ_COULD_HAVE_BEEN_OPTIMIZED__GFX09 = 0x000000d5, - CB_PERF_SEL_QUAD_BLENDING_COULD_HAVE_BEEN_BYPASSED__GFX09 = 0x000000d6, - CB_PERF_SEL_QUAD_COULD_HAVE_BEEN_DISCARDED__GFX09 = 0x000000d7, - CB_PERF_SEL_BLEND_OPT_PIXELS_RESULT_EQ_DEST__GFX09 = 0x000000d8, - CB_PERF_SEL_FC_PF_SLOW_MODE_QUAD_EMPTY_HALF_DROPPED__GFX09 = 0x000000de, - CB_PERF_SEL_FC_DOC_IS_STALLED__GFX09 = 0x000000e4, - CB_PERF_SEL_FC_DCC_KEY_VALUE__CLEAR__GFX09 = 0x0000010a, - CB_PERF_SEL_CC_DCC_KEY_VALUE__4_BLOCKS__2TO1__GFX09 = 0x0000010b, - CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_BOTH__GFX09 = 0x0000018c, - CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_LEFT__GFX09 = 0x0000018d, - CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_RIGHT__GFX09 = 0x0000018e, - CB_PERF_SEL_RBP_SPLIT_MICROTILE__GFX09 = 0x0000018f, - CB_PERF_SEL_RBP_SPLIT_AA_SAMPLE_MASK__GFX09 = 0x00000190, - CB_PERF_SEL_RBP_SPLIT_PARTIAL_TARGET_MASK__GFX09 = 0x00000191, - CB_PERF_SEL_RBP_SPLIT_LINEAR_ADDRESSING__GFX09 = 0x00000192, - CB_PERF_SEL_RBP_SPLIT_AA_NO_FMASK_COMPRESS__GFX09 = 0x00000193, - CB_PERF_SEL_RBP_INSERT_MISSING_LAST_QUAD__GFX09 = 0x00000194, - CB_PERF_SEL_MERGE_PIXELS_WITH_BLEND_ENABLED__GFX09 = 0x000001b1, - CB_PERF_SEL_NONE__GFX09_10 = 0x00000000, - CB_PERF_SEL_CORE_SCLK_VLD__GFX09_10 = 0x00000002, - CB_PERF_SEL_REG_SCLK0_VLD__GFX09_10 = 0x00000003, - CB_PERF_SEL_REG_SCLK1_VLD__GFX09_10 = 0x00000004, - CB_PERF_SEL_DRAWN_QUAD__GFX09_10 = 0x00000005, - CB_PERF_SEL_DRAWN_PIXEL__GFX09_10 = 0x00000006, - CB_PERF_SEL_DRAWN_QUAD_FRAGMENT__GFX09_10 = 0x00000007, - CB_PERF_SEL_DRAWN_TILE__GFX09_10 = 0x00000008, - CB_PERF_SEL_DB_CB_TILE_VALID_READY__GFX09_10 = 0x00000009, - CB_PERF_SEL_DB_CB_TILE_VALID_READYB__GFX09_10 = 0x0000000a, - CB_PERF_SEL_DB_CB_TILE_VALIDB_READY__GFX09_10 = 0x0000000b, - CB_PERF_SEL_DB_CB_TILE_VALIDB_READYB__GFX09_10 = 0x0000000c, - CB_PERF_SEL_CM_FC_TILE_VALID_READY__GFX09_10 = 0x0000000d, - CB_PERF_SEL_CM_FC_TILE_VALID_READYB__GFX09_10 = 0x0000000e, - CB_PERF_SEL_CM_FC_TILE_VALIDB_READY__GFX09_10 = 0x0000000f, - CB_PERF_SEL_CM_FC_TILE_VALIDB_READYB__GFX09_10 = 0x00000010, - CB_PERF_SEL_MERGE_TILE_ONLY_VALID_READY__GFX09_10 = 0x00000011, - CB_PERF_SEL_MERGE_TILE_ONLY_VALID_READYB__GFX09_10 = 0x00000012, - CB_PERF_SEL_DB_CB_LQUAD_VALID_READY__GFX09_10 = 0x00000013, - CB_PERF_SEL_DB_CB_LQUAD_VALID_READYB__GFX09_10 = 0x00000014, - CB_PERF_SEL_DB_CB_LQUAD_VALIDB_READY__GFX09_10 = 0x00000015, - CB_PERF_SEL_DB_CB_LQUAD_VALIDB_READYB__GFX09_10 = 0x00000016, - CB_PERF_SEL_LQUAD_NO_TILE__GFX09_10 = 0x00000017, - CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_R__GFX09_10 = 0x00000018, - CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_AR__GFX09_10 = 0x00000019, - CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_GR__GFX09_10 = 0x0000001a, - CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_ABGR__GFX09_10 = 0x0000001b, - CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_FP16_ABGR__GFX09_10 = 0x0000001c, - CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_SIGNED16_ABGR__GFX09_10 = 0x0000001d, - CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_UNSIGNED16_ABGR__GFX09_10 = 0x0000001e, - CB_PERF_SEL_FC_CLEAR_QUAD_VALID_READY__GFX09_10 = 0x00000024, - CB_PERF_SEL_FC_CLEAR_QUAD_VALID_READYB__GFX09_10 = 0x00000025, - CB_PERF_SEL_FC_CLEAR_QUAD_VALIDB_READY__GFX09_10 = 0x00000026, - CB_PERF_SEL_FC_CLEAR_QUAD_VALIDB_READYB__GFX09_10 = 0x00000027, - CB_PERF_SEL_FOP_IN_VALID_READY__GFX09_10 = 0x00000028, - CB_PERF_SEL_FOP_IN_VALID_READYB__GFX09_10 = 0x00000029, - CB_PERF_SEL_FOP_IN_VALIDB_READY__GFX09_10 = 0x0000002a, - CB_PERF_SEL_FOP_IN_VALIDB_READYB__GFX09_10 = 0x0000002b, - CB_PERF_SEL_FC_CC_QUADFRAG_VALID_READY__GFX09_10 = 0x0000002c, - CB_PERF_SEL_FC_CC_QUADFRAG_VALID_READYB__GFX09_10 = 0x0000002d, - CB_PERF_SEL_FC_CC_QUADFRAG_VALIDB_READY__GFX09_10 = 0x0000002e, - CB_PERF_SEL_FC_CC_QUADFRAG_VALIDB_READYB__GFX09_10 = 0x0000002f, - CB_PERF_SEL_CC_IB_SR_FRAG_VALID_READY__GFX09_10 = 0x00000030, - CB_PERF_SEL_CC_IB_SR_FRAG_VALID_READYB__GFX09_10 = 0x00000031, - CB_PERF_SEL_CC_IB_SR_FRAG_VALIDB_READY__GFX09_10 = 0x00000032, - CB_PERF_SEL_CC_IB_SR_FRAG_VALIDB_READYB__GFX09_10 = 0x00000033, - CB_PERF_SEL_CC_IB_TB_FRAG_VALID_READY__GFX09_10 = 0x00000034, - CB_PERF_SEL_CC_IB_TB_FRAG_VALID_READYB__GFX09_10 = 0x00000035, - CB_PERF_SEL_CC_IB_TB_FRAG_VALIDB_READY__GFX09_10 = 0x00000036, - CB_PERF_SEL_CC_IB_TB_FRAG_VALIDB_READYB__GFX09_10 = 0x00000037, - CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALID_READY__GFX09_10 = 0x00000038, - CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALID_READYB__GFX09_10 = 0x00000039, - CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALIDB_READY__GFX09_10 = 0x0000003a, - CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALIDB_READYB__GFX09_10 = 0x0000003b, - CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALID_READY__GFX09_10 = 0x0000003c, - CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALID_READYB__GFX09_10 = 0x0000003d, - CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALIDB_READY__GFX09_10 = 0x0000003e, - CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALIDB_READYB__GFX09_10 = 0x0000003f, - CB_PERF_SEL_CC_BC_CS_FRAG_VALID__GFX09_10 = 0x00000040, - CB_PERF_SEL_CM_CACHE_HIT__GFX09_10 = 0x00000041, - CB_PERF_SEL_CM_CACHE_TAG_MISS__GFX09_10 = 0x00000042, - CB_PERF_SEL_CM_CACHE_SECTOR_MISS__GFX09_10 = 0x00000043, - CB_PERF_SEL_CM_CACHE_REEVICTION_STALL__GFX09_10 = 0x00000044, - CB_PERF_SEL_CM_CACHE_EVICT_NONZERO_INFLIGHT_STALL__GFX09_10 = 0x00000045, - CB_PERF_SEL_CM_CACHE_REPLACE_PENDING_EVICT_STALL__GFX09_10 = 0x00000046, - CB_PERF_SEL_CM_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL__GFX09_10 = 0x00000047, - CB_PERF_SEL_CM_CACHE_READ_OUTPUT_STALL__GFX09_10 = 0x00000048, - CB_PERF_SEL_CM_CACHE_WRITE_OUTPUT_STALL__GFX09_10 = 0x00000049, - CB_PERF_SEL_CM_CACHE_ACK_OUTPUT_STALL__GFX09_10 = 0x0000004a, - CB_PERF_SEL_CM_CACHE_STALL__GFX09_10 = 0x0000004b, - CB_PERF_SEL_CM_CACHE_FLUSH__GFX09_10 = 0x0000004c, - CB_PERF_SEL_CM_CACHE_TAGS_FLUSHED__GFX09_10 = 0x0000004d, - CB_PERF_SEL_CM_CACHE_SECTORS_FLUSHED__GFX09_10 = 0x0000004e, - CB_PERF_SEL_CM_CACHE_DIRTY_SECTORS_FLUSHED__GFX09_10 = 0x0000004f, - CB_PERF_SEL_FC_CACHE_HIT__GFX09_10 = 0x00000050, - CB_PERF_SEL_FC_CACHE_TAG_MISS__GFX09_10 = 0x00000051, - CB_PERF_SEL_FC_CACHE_SECTOR_MISS__GFX09_10 = 0x00000052, - CB_PERF_SEL_FC_CACHE_REEVICTION_STALL__GFX09_10 = 0x00000053, - CB_PERF_SEL_FC_CACHE_EVICT_NONZERO_INFLIGHT_STALL__GFX09_10 = 0x00000054, - CB_PERF_SEL_FC_CACHE_REPLACE_PENDING_EVICT_STALL__GFX09_10 = 0x00000055, - CB_PERF_SEL_FC_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL__GFX09_10 = 0x00000056, - CB_PERF_SEL_FC_CACHE_READ_OUTPUT_STALL__GFX09_10 = 0x00000057, - CB_PERF_SEL_FC_CACHE_WRITE_OUTPUT_STALL__GFX09_10 = 0x00000058, - CB_PERF_SEL_FC_CACHE_ACK_OUTPUT_STALL__GFX09_10 = 0x00000059, - CB_PERF_SEL_FC_CACHE_STALL__GFX09_10 = 0x0000005a, - CB_PERF_SEL_FC_CACHE_FLUSH__GFX09_10 = 0x0000005b, - CB_PERF_SEL_FC_CACHE_TAGS_FLUSHED__GFX09_10 = 0x0000005c, - CB_PERF_SEL_FC_CACHE_SECTORS_FLUSHED__GFX09_10 = 0x0000005d, - CB_PERF_SEL_FC_CACHE_DIRTY_SECTORS_FLUSHED__GFX09_10 = 0x0000005e, - CB_PERF_SEL_CC_CACHE_TAG_MISS__GFX09_10 = 0x00000060, - CB_PERF_SEL_CC_CACHE_SECTOR_MISS__GFX09_10 = 0x00000061, - CB_PERF_SEL_CC_CACHE_REEVICTION_STALL__GFX09_10 = 0x00000062, - CB_PERF_SEL_CC_CACHE_EVICT_NONZERO_INFLIGHT_STALL__GFX09_10 = 0x00000063, - CB_PERF_SEL_CC_CACHE_REPLACE_PENDING_EVICT_STALL__GFX09_10 = 0x00000064, - CB_PERF_SEL_CC_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL__GFX09_10 = 0x00000065, - CB_PERF_SEL_CC_CACHE_READ_OUTPUT_STALL__GFX09_10 = 0x00000066, - CB_PERF_SEL_CC_CACHE_WRITE_OUTPUT_STALL__GFX09_10 = 0x00000067, - CB_PERF_SEL_CC_CACHE_ACK_OUTPUT_STALL__GFX09_10 = 0x00000068, - CB_PERF_SEL_CC_CACHE_STALL__GFX09_10 = 0x00000069, - CB_PERF_SEL_CC_CACHE_FLUSH__GFX09_10 = 0x0000006a, - CB_PERF_SEL_CC_CACHE_TAGS_FLUSHED__GFX09_10 = 0x0000006b, - CB_PERF_SEL_CC_CACHE_SECTORS_FLUSHED__GFX09_10 = 0x0000006c, - CB_PERF_SEL_CC_CACHE_DIRTY_SECTORS_FLUSHED__GFX09_10 = 0x0000006d, - CB_PERF_SEL_CC_CACHE_WA_TO_RMW_CONVERSION__GFX09_10 = 0x0000006e, - CB_PERF_SEL_CC_CACHE_READS_SAVED_DUE_TO_DCC__GFX09_10 = 0x0000006f, - CB_PERF_SEL_CB_TAP_WRREQ_VALID_READY__GFX09_10 = 0x00000070, - CB_PERF_SEL_CB_TAP_WRREQ_VALID_READYB__GFX09_10 = 0x00000071, - CB_PERF_SEL_CB_TAP_WRREQ_VALIDB_READY__GFX09_10 = 0x00000072, - CB_PERF_SEL_CB_TAP_WRREQ_VALIDB_READYB__GFX09_10 = 0x00000073, - CB_PERF_SEL_CM_MC_WRITE_REQUEST__GFX09_10 = 0x00000074, - CB_PERF_SEL_FC_MC_WRITE_REQUEST__GFX09_10 = 0x00000075, - CB_PERF_SEL_CC_MC_WRITE_REQUEST__GFX09_10 = 0x00000076, - CB_PERF_SEL_CM_MC_WRITE_REQUESTS_IN_FLIGHT__GFX09_10 = 0x00000077, - CB_PERF_SEL_FC_MC_WRITE_REQUESTS_IN_FLIGHT__GFX09_10 = 0x00000078, - CB_PERF_SEL_CC_MC_WRITE_REQUESTS_IN_FLIGHT__GFX09_10 = 0x00000079, - CB_PERF_SEL_CB_TAP_RDREQ_VALID_READY__GFX09_10 = 0x0000007a, - CB_PERF_SEL_CB_TAP_RDREQ_VALID_READYB__GFX09_10 = 0x0000007b, - CB_PERF_SEL_CB_TAP_RDREQ_VALIDB_READY__GFX09_10 = 0x0000007c, - CB_PERF_SEL_CB_TAP_RDREQ_VALIDB_READYB__GFX09_10 = 0x0000007d, - CB_PERF_SEL_CM_MC_READ_REQUEST__GFX09_10 = 0x0000007e, - CB_PERF_SEL_FC_MC_READ_REQUEST__GFX09_10 = 0x0000007f, - CB_PERF_SEL_CC_MC_READ_REQUEST__GFX09_10 = 0x00000080, - CB_PERF_SEL_CM_MC_READ_REQUESTS_IN_FLIGHT__GFX09_10 = 0x00000081, - CB_PERF_SEL_FC_MC_READ_REQUESTS_IN_FLIGHT__GFX09_10 = 0x00000082, - CB_PERF_SEL_CC_MC_READ_REQUESTS_IN_FLIGHT__GFX09_10 = 0x00000083, - CB_PERF_SEL_CM_TQ_FULL__GFX09_10 = 0x00000084, - CB_PERF_SEL_CM_TQ_FIFO_TILE_RESIDENCY_STALL__GFX09_10 = 0x00000085, - CB_PERF_SEL_FC_QUAD_RDLAT_FIFO_FULL__GFX09_10 = 0x00000086, - CB_PERF_SEL_FC_TILE_RDLAT_FIFO_FULL__GFX09_10 = 0x00000087, - CB_PERF_SEL_FC_RDLAT_FIFO_QUAD_RESIDENCY_STALL__GFX09_10 = 0x00000088, - CB_PERF_SEL_FOP_FMASK_RAW_STALL__GFX09_10 = 0x00000089, - CB_PERF_SEL_FOP_FMASK_BYPASS_STALL__GFX09_10 = 0x0000008a, - CB_PERF_SEL_CC_SF_FULL__GFX09_10 = 0x0000008b, - CB_PERF_SEL_CC_RB_FULL__GFX09_10 = 0x0000008c, - CB_PERF_SEL_CC_EVENFIFO_QUAD_RESIDENCY_STALL__GFX09_10 = 0x0000008d, - CB_PERF_SEL_CC_ODDFIFO_QUAD_RESIDENCY_STALL__GFX09_10 = 0x0000008e, - CB_PERF_SEL_BLENDER_RAW_HAZARD_STALL__GFX09_10 = 0x0000008f, - CB_PERF_SEL_EVENT__GFX09_10 = 0x00000090, - CB_PERF_SEL_EVENT_CACHE_FLUSH_TS__GFX09_10 = 0x00000091, - CB_PERF_SEL_EVENT_CONTEXT_DONE__GFX09_10 = 0x00000092, - CB_PERF_SEL_EVENT_CACHE_FLUSH__GFX09_10 = 0x00000093, - CB_PERF_SEL_EVENT_CACHE_FLUSH_AND_INV_TS_EVENT__GFX09_10 = 0x00000094, - CB_PERF_SEL_EVENT_CACHE_FLUSH_AND_INV_EVENT__GFX09_10 = 0x00000095, - CB_PERF_SEL_EVENT_FLUSH_AND_INV_CB_DATA_TS__GFX09_10 = 0x00000096, - CB_PERF_SEL_EVENT_FLUSH_AND_INV_CB_META__GFX09_10 = 0x00000097, - CB_PERF_SEL_CC_SURFACE_SYNC__GFX09_10 = 0x00000098, - CB_PERF_SEL_CMASK_READ_DATA_0xC__GFX09_10 = 0x00000099, - CB_PERF_SEL_CMASK_READ_DATA_0xD__GFX09_10 = 0x0000009a, - CB_PERF_SEL_CMASK_READ_DATA_0xE__GFX09_10 = 0x0000009b, - CB_PERF_SEL_CMASK_READ_DATA_0xF__GFX09_10 = 0x0000009c, - CB_PERF_SEL_CMASK_WRITE_DATA_0xC__GFX09_10 = 0x0000009d, - CB_PERF_SEL_CMASK_WRITE_DATA_0xD__GFX09_10 = 0x0000009e, - CB_PERF_SEL_CMASK_WRITE_DATA_0xE__GFX09_10 = 0x0000009f, - CB_PERF_SEL_CMASK_WRITE_DATA_0xF__GFX09_10 = 0x000000a0, - CB_PERF_SEL_TWO_PROBE_QUAD_FRAGMENT__GFX09_10 = 0x000000a1, - CB_PERF_SEL_EXPORT_32_ABGR_QUAD_FRAGMENT__GFX09_10 = 0x000000a2, - CB_PERF_SEL_DUAL_SOURCE_COLOR_QUAD_FRAGMENT__GFX09_10 = 0x000000a3, - CB_PERF_SEL_DRAWN_BUSY__GFX09_10 = 0x000000d9, - CB_PERF_SEL_TILE_TO_CMR_REGION_BUSY__GFX09_10 = 0x000000da, - CB_PERF_SEL_CMR_TO_FCR_REGION_BUSY__GFX09_10 = 0x000000db, - CB_PERF_SEL_FCR_TO_CCR_REGION_BUSY__GFX09_10 = 0x000000dc, - CB_PERF_SEL_CCR_TO_CCW_REGION_BUSY__GFX09_10 = 0x000000dd, - CB_PERF_SEL_FC_SEQUENCER_CLEAR__GFX09_10 = 0x000000df, - CB_PERF_SEL_FC_SEQUENCER_ELIMINATE_FAST_CLEAR__GFX09_10 = 0x000000e0, - CB_PERF_SEL_FC_SEQUENCER_FMASK_DECOMPRESS__GFX09_10 = 0x000000e1, - CB_PERF_SEL_FC_SEQUENCER_FMASK_COMPRESSION_DISABLE__GFX09_10 = 0x000000e2, - CB_PERF_SEL_FC_KEYID_RDLAT_FIFO_FULL__GFX09_10 = 0x000000e3, - CB_PERF_SEL_FC_DOC_MRTS_NOT_COMBINED__GFX09_10 = 0x000000e5, - CB_PERF_SEL_FC_DOC_MRTS_COMBINED__GFX09_10 = 0x000000e6, - CB_PERF_SEL_FC_DOC_QTILE_CAM_MISS__GFX09_10 = 0x000000e7, - CB_PERF_SEL_FC_DOC_QTILE_CAM_HIT__GFX09_10 = 0x000000e8, - CB_PERF_SEL_FC_DOC_CLINE_CAM_MISS__GFX09_10 = 0x000000e9, - CB_PERF_SEL_FC_DOC_CLINE_CAM_HIT__GFX09_10 = 0x000000ea, - CB_PERF_SEL_FC_DOC_QUAD_PTR_FIFO_IS_FULL__GFX09_10 = 0x000000eb, - CB_PERF_SEL_FC_DOC_OVERWROTE_1_SECTOR__GFX09_10 = 0x000000ec, - CB_PERF_SEL_FC_DOC_OVERWROTE_2_SECTORS__GFX09_10 = 0x000000ed, - CB_PERF_SEL_FC_DOC_OVERWROTE_3_SECTORS__GFX09_10 = 0x000000ee, - CB_PERF_SEL_FC_DOC_OVERWROTE_4_SECTORS__GFX09_10 = 0x000000ef, - CB_PERF_SEL_FC_DOC_TOTAL_OVERWRITTEN_SECTORS__GFX09_10 = 0x000000f0, - CB_PERF_SEL_FC_DCC_CACHE_HIT__GFX09_10 = 0x000000f1, - CB_PERF_SEL_FC_DCC_CACHE_TAG_MISS__GFX09_10 = 0x000000f2, - CB_PERF_SEL_FC_DCC_CACHE_SECTOR_MISS__GFX09_10 = 0x000000f3, - CB_PERF_SEL_FC_DCC_CACHE_REEVICTION_STALL__GFX09_10 = 0x000000f4, - CB_PERF_SEL_FC_DCC_CACHE_EVICT_NONZERO_INFLIGHT_STALL__GFX09_10 = 0x000000f5, - CB_PERF_SEL_FC_DCC_CACHE_REPLACE_PENDING_EVICT_STALL__GFX09_10 = 0x000000f6, - CB_PERF_SEL_FC_DCC_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL__GFX09_10 = 0x000000f7, - CB_PERF_SEL_FC_DCC_CACHE_READ_OUTPUT_STALL__GFX09_10 = 0x000000f8, - CB_PERF_SEL_FC_DCC_CACHE_WRITE_OUTPUT_STALL__GFX09_10 = 0x000000f9, - CB_PERF_SEL_FC_DCC_CACHE_ACK_OUTPUT_STALL__GFX09_10 = 0x000000fa, - CB_PERF_SEL_FC_DCC_CACHE_STALL__GFX09_10 = 0x000000fb, - CB_PERF_SEL_FC_DCC_CACHE_FLUSH__GFX09_10 = 0x000000fc, - CB_PERF_SEL_CC_DCC_BEYOND_TILE_SPLIT__GFX09_10 = 0x00000100, - CB_PERF_SEL_FC_MC_DCC_WRITE_REQUEST__GFX09_10 = 0x00000101, - CB_PERF_SEL_FC_MC_DCC_WRITE_REQUESTS_IN_FLIGHT__GFX09_10 = 0x00000102, - CB_PERF_SEL_FC_MC_DCC_READ_REQUEST__GFX09_10 = 0x00000103, - CB_PERF_SEL_FC_MC_DCC_READ_REQUESTS_IN_FLIGHT__GFX09_10 = 0x00000104, - CB_PERF_SEL_CC_DCC_RDREQ_STALL__GFX09_10 = 0x00000105, - CB_PERF_SEL_CC_DCC_DECOMPRESS_TIDS_IN__GFX09_10 = 0x00000106, - CB_PERF_SEL_CC_DCC_DECOMPRESS_TIDS_OUT__GFX09_10 = 0x00000107, - CB_PERF_SEL_CC_DCC_COMPRESS_TIDS_IN__GFX09_10 = 0x00000108, - CB_PERF_SEL_CC_DCC_COMPRESS_TIDS_OUT__GFX09_10 = 0x00000109, - CB_PERF_SEL_CC_DCC_KEY_VALUE__3BLOCKS_2TO1__1BLOCK_2TO2__GFX09_10 = 0x0000010c, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_2TO2__1BLOCK_2TO1__GFX09_10 = 0x0000010d, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__2BLOCKS_2TO1__GFX09_10 = 0x0000010e, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__3BLOCKS_2TO1__GFX09_10 = 0x0000010f, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__2BLOCKS_2TO2__GFX09_10 = 0x00000110, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__2BLOCKS_2TO2__1BLOCK_2TO1__GFX09_10 = 0x00000111, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_2TO2__GFX09_10 = 0x00000112, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_2TO1__GFX09_10 = 0x00000113, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__2BLOCKS_2TO1__GFX09_10 = 0x00000114, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__2BLOCKS_2TO1__1BLOCK_2TO2__GFX09_10 = 0x00000115, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__3BLOCKS_2TO2__GFX09_10 = 0x00000116, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__2BLOCKS_2TO2__GFX09_10 = 0x00000117, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__1BLOCK_2TO1__1BLOCK_2TO2__GFX09_10 = 0x00000118, - CB_PERF_SEL_CC_DCC_KEY_VALUE__3BLOCKS_2TO2__1BLOCK_2TO1__GFX09_10 = 0x00000119, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_4TO1__GFX09_10 = 0x0000011a, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_4TO2__GFX09_10 = 0x0000011b, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_4TO3__GFX09_10 = 0x0000011c, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_4TO4__GFX09_10 = 0x0000011d, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_4TO1__GFX09_10 = 0x0000011e, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_4TO2__GFX09_10 = 0x0000011f, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_4TO3__GFX09_10 = 0x00000120, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_4TO4__GFX09_10 = 0x00000121, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_4TO1__GFX09_10 = 0x00000122, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_4TO2__GFX09_10 = 0x00000123, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_4TO3__GFX09_10 = 0x00000124, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_4TO4__GFX09_10 = 0x00000125, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_4TO1__GFX09_10 = 0x00000126, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_4TO2__GFX09_10 = 0x00000127, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_4TO3__GFX09_10 = 0x00000128, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_4TO1__GFX09_10 = 0x00000129, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_4TO2__GFX09_10 = 0x0000012a, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_4TO3__GFX09_10 = 0x0000012b, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_4TO4__GFX09_10 = 0x0000012c, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_4TO1__GFX09_10 = 0x0000012d, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_4TO2__GFX09_10 = 0x0000012e, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_4TO3__GFX09_10 = 0x0000012f, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_4TO4__GFX09_10 = 0x00000130, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_4TO1__GFX09_10 = 0x00000131, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_4TO2__GFX09_10 = 0x00000132, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_4TO3__GFX09_10 = 0x00000133, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_4TO4__GFX09_10 = 0x00000134, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__1BLOCK_4TO1__GFX09_10 = 0x00000135, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__1BLOCK_4TO2__GFX09_10 = 0x00000136, - CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__1BLOCK_4TO3__GFX09_10 = 0x00000137, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO1__1BLOCK_2TO1__GFX09_10 = 0x00000138, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO2__1BLOCK_2TO1__GFX09_10 = 0x00000139, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO3__1BLOCK_2TO1__GFX09_10 = 0x0000013a, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO4__1BLOCK_2TO1__GFX09_10 = 0x0000013b, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO1__1BLOCK_2TO1__GFX09_10 = 0x0000013c, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO2__1BLOCK_2TO1__GFX09_10 = 0x0000013d, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO3__1BLOCK_2TO1__GFX09_10 = 0x0000013e, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO4__1BLOCK_2TO1__GFX09_10 = 0x0000013f, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO1__1BLOCK_2TO2__GFX09_10 = 0x00000140, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO2__1BLOCK_2TO2__GFX09_10 = 0x00000141, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO3__1BLOCK_2TO2__GFX09_10 = 0x00000142, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO4__1BLOCK_2TO2__GFX09_10 = 0x00000143, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO1__1BLOCK_2TO2__GFX09_10 = 0x00000144, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO2__1BLOCK_2TO2__GFX09_10 = 0x00000145, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO3__1BLOCK_2TO2__GFX09_10 = 0x00000146, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__2BLOCKS_2TO1__GFX09_10 = 0x00000147, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__2BLOCKS_2TO1__GFX09_10 = 0x00000148, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__2BLOCKS_2TO1__GFX09_10 = 0x00000149, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__2BLOCKS_2TO1__GFX09_10 = 0x0000014a, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__2BLOCKS_2TO2__GFX09_10 = 0x0000014b, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__2BLOCKS_2TO2__GFX09_10 = 0x0000014c, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__2BLOCKS_2TO2__GFX09_10 = 0x0000014d, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_2TO1__1BLOCK_2TO2__GFX09_10 = 0x0000014e, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_2TO1__1BLOCK_2TO2__GFX09_10 = 0x0000014f, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_2TO1__1BLOCK_2TO2__GFX09_10 = 0x00000150, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_2TO1__1BLOCK_2TO2__GFX09_10 = 0x00000151, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_2TO2__1BLOCK_2TO1__GFX09_10 = 0x00000152, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_2TO2__1BLOCK_2TO1__GFX09_10 = 0x00000153, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_2TO2__1BLOCK_2TO1__GFX09_10 = 0x00000154, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_2TO2__1BLOCK_2TO1__GFX09_10 = 0x00000155, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO1__GFX09_10 = 0x00000156, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO2__GFX09_10 = 0x00000157, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO3__GFX09_10 = 0x00000158, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO4__GFX09_10 = 0x00000159, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO5__GFX09_10 = 0x0000015a, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO6__GFX09_10 = 0x0000015b, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__INV0__GFX09_10 = 0x0000015c, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__INV1__GFX09_10 = 0x0000015d, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO1__GFX09_10 = 0x0000015e, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO2__GFX09_10 = 0x0000015f, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO3__GFX09_10 = 0x00000160, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO4__GFX09_10 = 0x00000161, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO5__GFX09_10 = 0x00000162, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__INV0__GFX09_10 = 0x00000163, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__INV1__GFX09_10 = 0x00000164, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO1__1BLOCK_2TO1__GFX09_10 = 0x00000165, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO2__1BLOCK_2TO1__GFX09_10 = 0x00000166, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO3__1BLOCK_2TO1__GFX09_10 = 0x00000167, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO4__1BLOCK_2TO1__GFX09_10 = 0x00000168, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO5__1BLOCK_2TO1__GFX09_10 = 0x00000169, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO6__1BLOCK_2TO1__GFX09_10 = 0x0000016a, - CB_PERF_SEL_CC_DCC_KEY_VALUE__INV0__1BLOCK_2TO1__GFX09_10 = 0x0000016b, - CB_PERF_SEL_CC_DCC_KEY_VALUE__INV1__1BLOCK_2TO1__GFX09_10 = 0x0000016c, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO1__1BLOCK_2TO2__GFX09_10 = 0x0000016d, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO2__1BLOCK_2TO2__GFX09_10 = 0x0000016e, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO3__1BLOCK_2TO2__GFX09_10 = 0x0000016f, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO4__1BLOCK_2TO2__GFX09_10 = 0x00000170, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO5__1BLOCK_2TO2__GFX09_10 = 0x00000171, - CB_PERF_SEL_CC_DCC_KEY_VALUE__INV0__1BLOCK_2TO2__GFX09_10 = 0x00000172, - CB_PERF_SEL_CC_DCC_KEY_VALUE__INV1__1BLOCK_2TO2__GFX09_10 = 0x00000173, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO1__GFX09_10 = 0x00000174, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO2__GFX09_10 = 0x00000175, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO3__GFX09_10 = 0x00000176, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO4__GFX09_10 = 0x00000177, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO5__GFX09_10 = 0x00000178, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO6__GFX09_10 = 0x00000179, - CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO7__GFX09_10 = 0x0000017a, - CB_PERF_SEL_CC_DCC_KEY_VALUE__UNCOMPRESSED__GFX09_10 = 0x0000017b, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_2TO1__GFX09_10 = 0x0000017c, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_4TO1__GFX09_10 = 0x0000017d, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_4TO2__GFX09_10 = 0x0000017e, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_4TO3__GFX09_10 = 0x0000017f, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO1__GFX09_10 = 0x00000180, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO2__GFX09_10 = 0x00000181, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO3__GFX09_10 = 0x00000182, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO4__GFX09_10 = 0x00000183, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO5__GFX09_10 = 0x00000184, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO1__GFX09_10 = 0x00000185, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO2__GFX09_10 = 0x00000186, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO3__GFX09_10 = 0x00000187, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO4__GFX09_10 = 0x00000188, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO5__GFX09_10 = 0x00000189, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO6__GFX09_10 = 0x0000018a, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO7__GFX09_10 = 0x0000018b, - CB_PERF_SEL_NACK_CM_READ__GFX09_10 = 0x00000195, - CB_PERF_SEL_NACK_CM_WRITE__GFX09_10 = 0x00000196, - CB_PERF_SEL_NACK_FC_READ__GFX09_10 = 0x00000197, - CB_PERF_SEL_NACK_FC_WRITE__GFX09_10 = 0x00000198, - CB_PERF_SEL_NACK_DC_READ__GFX09_10 = 0x00000199, - CB_PERF_SEL_NACK_DC_WRITE__GFX09_10 = 0x0000019a, - CB_PERF_SEL_NACK_CC_READ__GFX09_10 = 0x0000019b, - CB_PERF_SEL_NACK_CC_WRITE__GFX09_10 = 0x0000019c, - CB_PERF_SEL_CM_MC_EARLY_WRITE_RETURN__GFX09_10 = 0x0000019d, - CB_PERF_SEL_FC_MC_EARLY_WRITE_RETURN__GFX09_10 = 0x0000019e, - CB_PERF_SEL_DC_MC_EARLY_WRITE_RETURN__GFX09_10 = 0x0000019f, - CB_PERF_SEL_CC_MC_EARLY_WRITE_RETURN__GFX09_10 = 0x000001a0, - CB_PERF_SEL_CM_MC_EARLY_WRITE_REQUESTS_IN_FLIGHT__GFX09_10 = 0x000001a1, - CB_PERF_SEL_FC_MC_EARLY_WRITE_REQUESTS_IN_FLIGHT__GFX09_10 = 0x000001a2, - CB_PERF_SEL_DC_MC_EARLY_WRITE_REQUESTS_IN_FLIGHT__GFX09_10 = 0x000001a3, - CB_PERF_SEL_CC_MC_EARLY_WRITE_REQUESTS_IN_FLIGHT__GFX09_10 = 0x000001a4, - CB_PERF_SEL_CM_MC_WRITE_ACK64B__GFX09_10 = 0x000001a5, - CB_PERF_SEL_FC_MC_WRITE_ACK64B__GFX09_10 = 0x000001a6, - CB_PERF_SEL_DC_MC_WRITE_ACK64B__GFX09_10 = 0x000001a7, - CB_PERF_SEL_CC_MC_WRITE_ACK64B__GFX09_10 = 0x000001a8, - CB_PERF_SEL_EVENT_BOTTOM_OF_PIPE_TS__GFX09_10 = 0x000001a9, - CB_PERF_SEL_EVENT_FLUSH_AND_INV_DB_DATA_TS__GFX09_10 = 0x000001aa, - CB_PERF_SEL_EVENT_FLUSH_AND_INV_CB_PIXEL_DATA__GFX09_10 = 0x000001ab, - CB_PERF_SEL_DB_CB_TILE_TILENOTEVENT__GFX09_10 = 0x000001ac, - CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32BPP_8PIX__GFX09_10 = 0x000001ad, - CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_16_16_UNSIGNED_8PIX__GFX09_10 = 0x000001ae, - CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_16_16_SIGNED_8PIX__GFX09_10 = 0x000001af, - CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_16_16_FLOAT_8PIX__GFX09_10 = 0x000001b0, - CB_PERF_SEL_DB_CB_CONTEXT_DONE__GFX09_10 = 0x000001b2, - CB_PERF_SEL_DB_CB_EOP_DONE__GFX09_10 = 0x000001b3, - CB_PERF_SEL_CC_MC_WRITE_REQUEST_PARTIAL__GFX09_10 = 0x000001b4, - CB_PERF_SEL_CC_BB_BLEND_PIXEL_VLD__GFX09_10 = 0x000001b5, - CB_PERF_SEL_FC_DCC_CACHE_SECTORS_FLUSHED__GFX10 = 0x000000fd, - CB_PERF_SEL_FC_DCC_CACHE_DIRTY_SECTORS_FLUSHED__GFX10 = 0x000000fe, - CB_PERF_SEL_FC_DCC_CACHE_TAGS_FLUSHED__GFX10 = 0x000000ff, - CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_REG__GFX10 = 0x000001b6, - CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_SINGLE__GFX10 = 0x000001b7, - CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_AC00__GFX10 = 0x000001b8, - CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_AC01__GFX10 = 0x000001b9, - CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_AC10__GFX10 = 0x000001ba, - CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_AC11__GFX10 = 0x000001bb, - CB_PERF_SEL_QUAD_KILLED_BY_EXTRA_PIXEL_EXPORT__GFX10CORE = 0x0000001f, - CB_PERF_SEL_QUAD_KILLED_BY_COLOR_INVALID__GFX10CORE = 0x00000020, - CB_PERF_SEL_QUAD_KILLED_BY_NULL_TARGET_SHADER_MASK__GFX10CORE = 0x00000021, - CB_PERF_SEL_QUAD_KILLED_BY_NULL_SAMPLE_MASK__GFX10CORE = 0x00000022, - CB_PERF_SEL_QUAD_KILLED_BY_DISCARD_PIXEL__GFX10CORE = 0x00000023, - CB_PERF_SEL_CC_CACHE_HIT__GFX10CORE = 0x0000005f, - CB_PERF_SEL_QUAD_HAS_1_FRAGMENT_BEFORE_UPDATE__GFX10CORE = 0x000000a4, - CB_PERF_SEL_QUAD_HAS_2_FRAGMENTS_BEFORE_UPDATE__GFX10CORE = 0x000000a5, - CB_PERF_SEL_QUAD_HAS_3_FRAGMENTS_BEFORE_UPDATE__GFX10CORE = 0x000000a6, - CB_PERF_SEL_QUAD_HAS_4_FRAGMENTS_BEFORE_UPDATE__GFX10CORE = 0x000000a7, - CB_PERF_SEL_QUAD_HAS_5_FRAGMENTS_BEFORE_UPDATE__GFX10CORE = 0x000000a8, - CB_PERF_SEL_QUAD_HAS_6_FRAGMENTS_BEFORE_UPDATE__GFX10CORE = 0x000000a9, - CB_PERF_SEL_QUAD_HAS_7_FRAGMENTS_BEFORE_UPDATE__GFX10CORE = 0x000000aa, - CB_PERF_SEL_QUAD_HAS_8_FRAGMENTS_BEFORE_UPDATE__GFX10CORE = 0x000000ab, - CB_PERF_SEL_QUAD_HAS_1_FRAGMENT_AFTER_UPDATE__GFX10CORE = 0x000000ac, - CB_PERF_SEL_QUAD_HAS_2_FRAGMENTS_AFTER_UPDATE__GFX10CORE = 0x000000ad, - CB_PERF_SEL_QUAD_HAS_3_FRAGMENTS_AFTER_UPDATE__GFX10CORE = 0x000000ae, - CB_PERF_SEL_QUAD_HAS_4_FRAGMENTS_AFTER_UPDATE__GFX10CORE = 0x000000af, - CB_PERF_SEL_QUAD_HAS_5_FRAGMENTS_AFTER_UPDATE__GFX10CORE = 0x000000b0, - CB_PERF_SEL_QUAD_HAS_6_FRAGMENTS_AFTER_UPDATE__GFX10CORE = 0x000000b1, - CB_PERF_SEL_QUAD_HAS_7_FRAGMENTS_AFTER_UPDATE__GFX10CORE = 0x000000b2, - CB_PERF_SEL_QUAD_HAS_8_FRAGMENTS_AFTER_UPDATE__GFX10CORE = 0x000000b3, - CB_PERF_SEL_QUAD_ADDED_1_FRAGMENT__GFX10CORE = 0x000000b4, - CB_PERF_SEL_QUAD_ADDED_2_FRAGMENTS__GFX10CORE = 0x000000b5, - CB_PERF_SEL_QUAD_ADDED_3_FRAGMENTS__GFX10CORE = 0x000000b6, - CB_PERF_SEL_QUAD_ADDED_4_FRAGMENTS__GFX10CORE = 0x000000b7, - CB_PERF_SEL_QUAD_ADDED_5_FRAGMENTS__GFX10CORE = 0x000000b8, - CB_PERF_SEL_QUAD_ADDED_6_FRAGMENTS__GFX10CORE = 0x000000b9, - CB_PERF_SEL_QUAD_ADDED_7_FRAGMENTS__GFX10CORE = 0x000000ba, - CB_PERF_SEL_QUAD_REMOVED_1_FRAGMENT__GFX10CORE = 0x000000bb, - CB_PERF_SEL_QUAD_REMOVED_2_FRAGMENTS__GFX10CORE = 0x000000bc, - CB_PERF_SEL_QUAD_REMOVED_3_FRAGMENTS__GFX10CORE = 0x000000bd, - CB_PERF_SEL_QUAD_REMOVED_4_FRAGMENTS__GFX10CORE = 0x000000be, - CB_PERF_SEL_QUAD_REMOVED_5_FRAGMENTS__GFX10CORE = 0x000000bf, - CB_PERF_SEL_QUAD_REMOVED_6_FRAGMENTS__GFX10CORE = 0x000000c0, - CB_PERF_SEL_QUAD_REMOVED_7_FRAGMENTS__GFX10CORE = 0x000000c1, - CB_PERF_SEL_QUAD_READS_FRAGMENT_0__GFX10CORE = 0x000000c2, - CB_PERF_SEL_QUAD_READS_FRAGMENT_1__GFX10CORE = 0x000000c3, - CB_PERF_SEL_QUAD_READS_FRAGMENT_2__GFX10CORE = 0x000000c4, - CB_PERF_SEL_QUAD_READS_FRAGMENT_3__GFX10CORE = 0x000000c5, - CB_PERF_SEL_QUAD_READS_FRAGMENT_4__GFX10CORE = 0x000000c6, - CB_PERF_SEL_QUAD_READS_FRAGMENT_5__GFX10CORE = 0x000000c7, - CB_PERF_SEL_QUAD_READS_FRAGMENT_6__GFX10CORE = 0x000000c8, - CB_PERF_SEL_QUAD_READS_FRAGMENT_7__GFX10CORE = 0x000000c9, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_0__GFX10CORE = 0x000000ca, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_1__GFX10CORE = 0x000000cb, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_2__GFX10CORE = 0x000000cc, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_3__GFX10CORE = 0x000000cd, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_4__GFX10CORE = 0x000000ce, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_5__GFX10CORE = 0x000000cf, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_6__GFX10CORE = 0x000000d0, - CB_PERF_SEL_QUAD_WRITES_FRAGMENT_7__GFX10CORE = 0x000000d1, - CB_PERF_SEL_QUAD_BLEND_OPT_DONT_READ_DST__GFX10CORE = 0x000000d2, - CB_PERF_SEL_QUAD_BLEND_OPT_BLEND_BYPASS__GFX10CORE = 0x000000d3, - CB_PERF_SEL_QUAD_BLEND_OPT_DISCARD_PIXELS__GFX10CORE = 0x000000d4, - CB_PERF_SEL_QUAD_DST_READ_COULD_HAVE_BEEN_OPTIMIZED__GFX10CORE = 0x000000d5, - CB_PERF_SEL_QUAD_BLENDING_COULD_HAVE_BEEN_BYPASSED__GFX10CORE = 0x000000d6, - CB_PERF_SEL_QUAD_COULD_HAVE_BEEN_DISCARDED__GFX10CORE = 0x000000d7, - CB_PERF_SEL_BLEND_OPT_PIXELS_RESULT_EQ_DEST__GFX10CORE = 0x000000d8, - CB_PERF_SEL_FC_PF_SLOW_MODE_QUAD_EMPTY_HALF_DROPPED__GFX10CORE = 0x000000de, - CB_PERF_SEL_FC_DOC_IS_STALLED__GFX10CORE = 0x000000e4, - CB_PERF_SEL_FC_DCC_KEY_VALUE__CLEAR__GFX10CORE = 0x0000010a, - CB_PERF_SEL_CC_DCC_KEY_VALUE__4_BLOCKS__2TO1__GFX10CORE = 0x0000010b, - CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_BOTH__GFX10CORE = 0x0000018c, - CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_LEFT__GFX10CORE = 0x0000018d, - CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_RIGHT__GFX10CORE = 0x0000018e, - CB_PERF_SEL_RBP_SPLIT_MICROTILE__GFX10CORE = 0x0000018f, - CB_PERF_SEL_RBP_SPLIT_AA_SAMPLE_MASK__GFX10CORE = 0x00000190, - CB_PERF_SEL_RBP_SPLIT_PARTIAL_TARGET_MASK__GFX10CORE = 0x00000191, - CB_PERF_SEL_RBP_SPLIT_LINEAR_ADDRESSING__GFX10CORE = 0x00000192, - CB_PERF_SEL_RBP_SPLIT_AA_NO_FMASK_COMPRESS__GFX10CORE = 0x00000193, - CB_PERF_SEL_RBP_INSERT_MISSING_LAST_QUAD__GFX10CORE = 0x00000194, - CB_PERF_SEL_MERGE_PIXELS_WITH_BLEND_ENABLED__GFX10CORE = 0x000001b1, - CB_PERF_SEL_TQ_STUTTER_STALL__GFX10CORE = 0x000001bc, - CB_PERF_SEL_FC_TILE_STUTTER_STALL__GFX10CORE = 0x000001bd, - CB_PERF_SEL_FC_QUAD_STUTTER_STALL__GFX10CORE = 0x000001be, - CB_PERF_SEL_FC_KEYID_STUTTER_STALL__GFX10CORE = 0x000001bf, - CB_PERF_SEL_CC_EVENFIFO_STUTTER_STALL__GFX10CORE = 0x000001c0, - CB_PERF_SEL_CC_ODDFIFO_STUTTER_STALL__GFX10CORE = 0x000001c1, - CB_PERF_SEL_CC_CACHE_256BS_SAVED_DUE_TO_QSB__GFX10CORE = 0x000001c2, - CB_PERF_SEL_FC_CACHE_FMASK_NO_FETCH__GFX10CORE = 0x000001c3, - CB_PERF_SEL_CC_CACHE_SECTOR_HIT__GFX10CORE = 0x000001c4, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - CB_PERF_SEL_DRAWN_BUSY__GFX11 = 0x00000002, - CB_PERF_SEL_DRAWN_PIXEL__GFX11 = 0x00000003, - CB_PERF_SEL_DRAWN_QUAD__GFX11 = 0x00000004, - CB_PERF_SEL_DRAWN_QUAD_FRAGMENT__GFX11 = 0x00000005, - CB_PERF_SEL_CC_DCC_DECOMPRESS_TIDS_IN__GFX11 = 0x0000000b, - CB_PERF_SEL_CC_DCC_DECOMPRESS_TIDS_OUT__GFX11 = 0x0000000c, - CB_PERF_SEL_CC_DCC_COMPRESS_TIDS_IN__GFX11 = 0x0000000d, - CB_PERF_SEL_CC_DCC_COMPRESS_TIDS_OUT__GFX11 = 0x0000000e, - CB_PERF_SEL_DB_CB_EXPORT_VALID_READY__GFX11 = 0x0000000f, - CB_PERF_SEL_DB_CB_EXPORT_VALID_READYB__GFX11 = 0x00000010, - CB_PERF_SEL_DB_CB_EXPORT_VALIDB_READY__GFX11 = 0x00000011, - CB_PERF_SEL_DB_CB_EXPORT_VALIDB_READYB__GFX11 = 0x00000012, - CB_PERF_SEL_CC_MA_WRITE_REQUEST__GFX11 = 0x00000013, - CB_PERF_SEL_CC_MA_WRITE_REQUESTS_IN_FLIGHT__GFX11 = 0x00000014, - CB_PERF_SEL_CC_MA_READ_REQUEST__GFX11 = 0x00000015, - CB_PERF_SEL_CC_MA_READ_REQUESTS_IN_FLIGHT__GFX11 = 0x00000016, - CB_PERF_SEL_FDCC_FMASK_DECOMPRESS_BYTES_OUT__GFX11 = 0x00000017, - CB_PERF_SEL_FDCC_FMASK_COMPRESS_BYTES_IN__GFX11 = 0x00000018, - CB_PERF_SEL_CC_FDCC_COMPRESS_FRAG_TIDS_IN__GFX11 = 0x00000019, - CB_PERF_SEL_CC_FDCC_DECOMPRESS_FRAG_TIDS_OUT__GFX11 = 0x0000001a, - CB_PERF_SEL_DC_MA_WRITE_REQUEST__GFX11 = 0x0000001e, - CB_PERF_SEL_DC_MA_WRITE_REQUESTS_IN_FLIGHT__GFX11 = 0x0000001f, - CB_PERF_SEL_DC_MA_READ_REQUEST__GFX11 = 0x00000020, - CB_PERF_SEL_DC_MA_READ_REQUESTS_IN_FLIGHT__GFX11 = 0x00000021, - CB_PERF_SEL_CB_RMI_WRREQ_VALID_READY__GFX11 = 0x00000022, - CB_PERF_SEL_CB_RMI_WRREQ_VALID_READYB__GFX11 = 0x00000023, - CB_PERF_SEL_CB_RMI_WRREQ_VALIDB_READY__GFX11 = 0x00000024, - CB_PERF_SEL_CB_RMI_WRREQ_VALIDB_READYB__GFX11 = 0x00000025, - CB_PERF_SEL_CB_RMI_RDREQ_VALID_READY__GFX11 = 0x00000026, - CB_PERF_SEL_CB_RMI_RDREQ_VALID_READYB__GFX11 = 0x00000027, - CB_PERF_SEL_CB_RMI_RDREQ_VALIDB_READY__GFX11 = 0x00000028, - CB_PERF_SEL_CB_RMI_RDREQ_VALIDB_READYB__GFX11 = 0x00000029, - CB_PERF_SEL_NACK_CC_READ__GFX11 = 0x0000002a, - CB_PERF_SEL_NACK_CC_WRITE__GFX11 = 0x0000002b, - CB_PERF_SEL_DC_MA_EARLY_WRITE_RETURN__GFX11 = 0x0000002c, - CB_PERF_SEL_CC_MA_EARLY_WRITE_RETURN__GFX11 = 0x0000002d, - CB_PERF_SEL_DC_MA_EARLY_WRITE_REQUESTS_IN_FLIGHT__GFX11 = 0x0000002e, - CB_PERF_SEL_CC_MA_EARLY_WRITE_REQUESTS_IN_FLIGHT__GFX11 = 0x0000002f, - CB_PERF_SEL_CC_DCC_RDREQ_STALL__GFX11 = 0x00000030, - CB_PERF_SEL_EVENT__GFX11 = 0x00000032, - CB_PERF_SEL_EVENT_CACHE_FLUSH_TS__GFX11 = 0x00000033, - CB_PERF_SEL_EVENT_CONTEXT_DONE__GFX11 = 0x00000034, - CB_PERF_SEL_EVENT_CACHE_FLUSH__GFX11 = 0x00000035, - CB_PERF_SEL_EVENT_CACHE_FLUSH_AND_INV_TS_EVENT__GFX11 = 0x00000036, - CB_PERF_SEL_EVENT_CACHE_FLUSH_AND_INV_EVENT__GFX11 = 0x00000037, - CB_PERF_SEL_EVENT_FLUSH_AND_INV_CB_DATA_TS__GFX11 = 0x00000038, - CB_PERF_SEL_EVENT_FLUSH_AND_INV_CB_META__GFX11 = 0x00000039, - CB_PERF_SEL_EVENT_BOTTOM_OF_PIPE_TS__GFX11 = 0x0000003a, - CB_PERF_SEL_STATIC_CLOCK_EN__GFX11 = 0x0000003c, - CB_PERF_SEL_PERFMON_CLOCK_EN__GFX11 = 0x0000003d, - CB_PERF_SEL_BLEND_CLOCK_EN__GFX11 = 0x0000003e, - CB_PERF_SEL_COLOR_STORE_CLOCK_EN__GFX11 = 0x0000003f, - CB_PERF_SEL_BACKEND_READ_CLOCK_EN__GFX11 = 0x00000040, - CB_PERF_SEL_GRBM_CLOCK_EN__GFX11 = 0x00000041, - CB_PERF_SEL_MEMARB_CLOCK_EN__GFX11 = 0x00000042, - CB_PERF_SEL_BACKEND_EVICT_PIPE_CLOCK_EN__GFX11 = 0x00000043, - CB_PERF_SEL_BACKEND_FRAGOP_CLOCK_EN__GFX11 = 0x00000044, - CB_PERF_SEL_BACKEND_SRC_FIFO_CLOCK_EN__GFX11 = 0x00000045, - CB_PERF_SEL_BACKEND_CACHE_CTL_CLOCK_EN__GFX11 = 0x00000046, - CB_PERF_SEL_FRONTEND_INPUT_CLOCK_EN__GFX11 = 0x00000047, - CB_PERF_SEL_FRONTEND_ADDR_CLOCK_EN__GFX11 = 0x00000048, - CB_PERF_SEL_FRONTEND_FDCC_CLOCK_EN__GFX11 = 0x00000049, - CB_PERF_SEL_FRONTEND_SAMPLE_MASK_TRACKER_CLOCK_EN__GFX11 = 0x0000004a, - CB_PERF_SEL_EVENTS_CLK_EN__GFX11 = 0x0000004b, - CB_PERF_SEL_CC_TAG_HIT__GFX11 = 0x00000050, - CB_PERF_SEL_CC_CACHE_TAG_MISS__GFX11 = 0x00000051, - CB_PERF_SEL_CC_CACHE_SECTOR_MISS__GFX11 = 0x00000052, - CB_PERF_SEL_CC_CACHE_SECTOR_HIT__GFX11 = 0x00000053, - CB_PERF_SEL_CC_CACHE_READ_OUTPUT_STALL__GFX11 = 0x00000058, - CB_PERF_SEL_CC_CACHE_WRITE_OUTPUT_STALL__GFX11 = 0x00000059, - CB_PERF_SEL_CC_CACHE_ACK_OUTPUT_STALL__GFX11 = 0x0000005a, - CB_PERF_SEL_CC_CACHE_STALL__GFX11 = 0x0000005b, - CB_PERF_SEL_CC_CACHE_FLUSH__GFX11 = 0x0000005c, - CB_PERF_SEL_CC_CACHE_SECTORS_FLUSHED__GFX11 = 0x0000005d, - CB_PERF_SEL_CC_CACHE_WA_TO_RMW_CONVERSION__GFX11 = 0x0000005e, - CB_PERF_SEL_CC_CACHE_QBLOCKS_FLUSHED__GFX11 = 0x0000005f, - CB_PERF_SEL_CC_CACHE_DIRTY_QBLOCKS_FLUSHED__GFX11 = 0x00000060, - CB_PERF_SEL_CC_CACHE_READS_SAVED_DUE_TO_DCC__GFX11 = 0x00000061, - CB_PERF_SEL_CCC_IN_EVICT_HAZARD_STALL__GFX11 = 0x00000062, - CB_PERF_SEL_CCC_COLOR_RESOURCE_PANIC__GFX11 = 0x00000063, - CB_PERF_SEL_CCC_FMASK_RESOURCE_PANIC__GFX11 = 0x00000064, - CB_PERF_SEL_CCC_FREE_WAYS_PANIC__GFX11 = 0x00000065, - CB_PERF_SEL_CCC_SKID_FIFO_FULL__GFX11 = 0x00000066, - CB_PERF_SEL_CCC_SKID_FIFO_STALL__GFX11 = 0x00000067, - CB_PERF_SEL_CCC_COLOR_RESOURCE_STALL__GFX11 = 0x00000068, - CB_PERF_SEL_CCC_FMASK_RESOURCE_STALL__GFX11 = 0x00000069, - CB_PERF_SEL_CCC_FREE_WAYS_STALL__GFX11 = 0x0000006a, - CB_PERF_SEL_CCC_KEY_XFR_RETURN_STALL__GFX11 = 0x0000006b, - CB_PERF_SEL_BE_SRCFIFO_FULL__GFX11 = 0x0000006e, - CB_PERF_SEL_BE_RDLATFIFO_FULL__GFX11 = 0x0000006f, - CB_PERF_SEL_RDLAT_FIFO_QUAD_RESIDENCY_STALL__GFX11 = 0x00000070, - CB_PERF_SEL_CC_QUADFRAG_VALID_READY__GFX11 = 0x00000071, - CB_PERF_SEL_CC_QUADFRAG_VALID_READYB__GFX11 = 0x00000072, - CB_PERF_SEL_CC_QUADFRAG_VALIDB_READY__GFX11 = 0x00000073, - CB_PERF_SEL_CC_QUADFRAG_VALIDB_READYB__GFX11 = 0x00000074, - CB_PERF_SEL_CC_MA_WRITE_REQUEST_PARTIAL__GFX11 = 0x00000075, - CB_PERF_SEL_CC_BB_BLEND_PIXEL_VALID_READY__GFX11 = 0x00000076, - CB_PERF_SEL_CC_BB_BLEND_PIXEL_VALID_READYB__GFX11 = 0x00000077, - CB_PERF_SEL_CC_BB_BLEND_PIXEL_VALIDB_READY__GFX11 = 0x00000078, - CB_PERF_SEL_CC_BB_BLEND_PIXEL_VALIDB_READYB__GFX11 = 0x00000079, - CB_PERF_SEL_READ_REQ_PIPE3_STALL__GFX11 = 0x00000080, - CB_PERF_SEL_READ_REQ_PIPE1_STALL__GFX11 = 0x00000081, - CB_PERF_SEL_DCC_CACHE_TAG_HIT__GFX11 = 0x00000082, - CB_PERF_SEL_DCC_CACHE_TAG_MISS__GFX11 = 0x00000083, - CB_PERF_SEL_DCC_CACHE_SECTOR_MISS__GFX11 = 0x00000084, - CB_PERF_SEL_DCC_CACHE_REEVICTION_STALL__GFX11 = 0x00000085, - CB_PERF_SEL_DCC_CACHE_EVICT_NONZERO_INFLIGHT_STALL__GFX11 = 0x00000086, - CB_PERF_SEL_DCC_CACHE_REPLACE_PENDING_EVICT_STALL__GFX11 = 0x00000087, - CB_PERF_SEL_DCC_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL__GFX11 = 0x00000088, - CB_PERF_SEL_DCC_CACHE_READ_OUTPUT_STALL__GFX11 = 0x00000089, - CB_PERF_SEL_DCC_CACHE_WRITE_OUTPUT_STALL__GFX11 = 0x0000008a, - CB_PERF_SEL_DCC_CACHE_ACK_OUTPUT_STALL__GFX11 = 0x0000008b, - CB_PERF_SEL_DCC_CACHE_STALL__GFX11 = 0x0000008c, - CB_PERF_SEL_DCC_CACHE_FLUSH__GFX11 = 0x0000008d, - CB_PERF_SEL_DCC_CACHE_SECTORS_FLUSHED__GFX11 = 0x0000008e, - CB_PERF_SEL_DCC_CACHE_DIRTY_SECTORS_FLUSHED__GFX11 = 0x0000008f, - CB_PERF_SEL_DCC_CACHE_TAGS_FLUSHED__GFX11 = 0x00000090, - CB_PERF_SEL_DCC_COMP_SECTORS_IN__GFX11 = 0x00000091, - CB_PERF_SEL_KEYID_RDLAT_FIFO_FULL__GFX11 = 0x00000095, - CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_BOTH__GFX11 = 0x00000096, - CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_LEFT__GFX11 = 0x00000097, - CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_RIGHT__GFX11 = 0x00000098, - CB_PERF_SEL_DCC_KEY_VALUE_CLEAR__GFX11 = 0x0000009b, - CB_PERF_SEL_SMT_OUT_XFR_FEA__GFX11 = 0x000000a0, - CB_PERF_SEL_SMT_FULLY_ON_PROBE__GFX11 = 0x000000a1, - CB_PERF_SEL_SMT_PARTIAL_ON_PROBE__GFX11 = 0x000000a2, - CB_PERF_SEL_SMT_OUT_XFR__GFX11 = 0x000000a3, - CB_PERF_SEL_SMT_OUT_GP_XFR__GFX11 = 0x000000a4, - CB_PERF_SEL_SMT_IN_XFR__GFX11 = 0x000000a5, - CB_PERF_SEL_SMT_CACHE_PROBE__GFX11 = 0x000000a6, - CB_PERF_SEL_SMT_MISS_ON_PROBE__GFX11 = 0x000000a7, - CB_PERF_SEL_SMT_HIT_ON_PROBE__GFX11 = 0x000000a8, - CB_PERF_SEL_SMT_IS_STALLED__GFX11 = 0x000000a9, - CB_PERF_SEL_SMT_QUAD_PTR_FIFO_IS_FULL__GFX11 = 0x000000aa, - CB_PERF_SEL_SMT_TOTAL_OVERWRITTEN_SECTORS__GFX11 = 0x000000ab, - CB_PERF_SEL_BLEND_QUAD_DST_READ_COULD_HAVE_BEEN_OPTIMIZED__GFX11 = 0x000000b4, - CB_PERF_SEL_BLEND_QUAD_BLENDING_COULD_HAVE_BEEN_BYPASSED__GFX11 = 0x000000b5, - CB_PERF_SEL_BLEND_QUAD_COULD_HAVE_BEEN_DISCARDED__GFX11 = 0x000000b6, - CB_PERF_SEL_BLEND_OPT_PIXELS_RESULT_EQ_DEST__GFX11 = 0x000000b7, - CB_PERF_SEL_BLEND_STALL_AT_OUTPUT__GFX11 = 0x000000b8, - CB_PERF_SEL_BLEND_STALL_ON_CACHE_ACCESS__GFX11 = 0x000000b9, - CB_PERF_SEL_BLEND_COLLISION_DUE_TO_CACHE_WRITE__GFX11 = 0x000000ba, - CB_PERF_SEL_BLEND_RAW_HAZARD_STALL__GFX11 = 0x000000bb, - CB_PERF_SEL_BE_CS_FILLRATE_1X2__GFX11 = 0x000000be, - CB_PERF_SEL_BE_CS_FILLRATE_2X1__GFX11 = 0x000000bf, - CB_PERF_SEL_BE_CS_FILLRATE_2X2__GFX11 = 0x000000c0, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_2TO1__GFX11 = 0x000000c8, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_2TO2__GFX11 = 0x000000c9, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_4TO1__GFX11 = 0x000000ca, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_4TO2__GFX11 = 0x000000cb, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_4TO3__GFX11 = 0x000000cc, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_4TO4__GFX11 = 0x000000cd, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO1__GFX11 = 0x000000ce, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO2__GFX11 = 0x000000cf, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO3__GFX11 = 0x000000d0, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO4__GFX11 = 0x000000d1, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO5__GFX11 = 0x000000d2, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO6__GFX11 = 0x000000d3, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO1__GFX11 = 0x000000d4, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO2__GFX11 = 0x000000d5, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO3__GFX11 = 0x000000d6, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO4__GFX11 = 0x000000d7, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO5__GFX11 = 0x000000d8, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO6__GFX11 = 0x000000d9, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO7__GFX11 = 0x000000da, - CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO8__GFX11 = 0x000000db, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO1_1MAXFRAGS__GFX11 = 0x000000dc, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO2_1MAXFRAGS__GFX11 = 0x000000dd, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO3_1MAXFRAGS__GFX11 = 0x000000de, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO4_1MAXFRAGS__GFX11 = 0x000000df, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO1_2MAXFRAGS__GFX11 = 0x000000e0, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO2_2MAXFRAGS__GFX11 = 0x000000e1, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO3_2MAXFRAGS__GFX11 = 0x000000e2, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO4_2MAXFRAGS__GFX11 = 0x000000e3, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO1_3MAXFRAGS__GFX11 = 0x000000e4, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO2_3MAXFRAGS__GFX11 = 0x000000e5, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO3_3MAXFRAGS__GFX11 = 0x000000e6, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO4_3MAXFRAGS__GFX11 = 0x000000e7, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO1_4MAXFRAGS__GFX11 = 0x000000e8, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO2_4MAXFRAGS__GFX11 = 0x000000e9, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO3_4MAXFRAGS__GFX11 = 0x000000ea, - CB_PERF_SEL_CC_FDCC_COMPRESS_RATIO_4TO4_4MAXFRAGS__GFX11 = 0x000000eb, - CB_PERF_SEL_CC_FDCC_COMPRESS_1X1__GFX11 = 0x000000ec, - CB_PERF_SEL_CC_FDCC_COMPRESS_1X2__GFX11 = 0x000000ed, - CB_PERF_SEL_CC_FDCC_COMPRESS_2X1__GFX11 = 0x000000ee, - CB_PERF_SEL_CC_FDCC_COMPRESS_2X2__GFX11 = 0x000000ef, - CB_PERF_SEL_FORMAT_IS_32_R__GFX11 = 0x000000fa, - CB_PERF_SEL_FORMAT_IS_32_AR__GFX11 = 0x000000fb, - CB_PERF_SEL_FORMAT_IS_32_GR__GFX11 = 0x000000fc, - CB_PERF_SEL_FORMAT_IS_32_ABGR__GFX11 = 0x000000fd, - CB_PERF_SEL_FORMAT_IS_FP16_ABGR__GFX11 = 0x000000fe, - CB_PERF_SEL_FORMAT_IS_SIGNED16_ABGR__GFX11 = 0x000000ff, - CB_PERF_SEL_FORMAT_IS_UNSIGNED16_ABGR__GFX11 = 0x00000100, - CB_PERF_SEL_FORMAT_IS_32BPP_8PIX__GFX11 = 0x00000101, - CB_PERF_SEL_FORMAT_IS_16_16_UNSIGNED_8PIX__GFX11 = 0x00000102, - CB_PERF_SEL_FORMAT_IS_16_16_SIGNED_8PIX__GFX11 = 0x00000103, - CB_PERF_SEL_FORMAT_IS_16_16_FLOAT_8PIX__GFX11 = 0x00000104, - CB_PERF_SEL_EXPORT_ADDED_1_FRAGMENT__GFX11 = 0x00000105, - CB_PERF_SEL_EXPORT_ADDED_2_FRAGMENTS__GFX11 = 0x00000106, - CB_PERF_SEL_EXPORT_ADDED_3_FRAGMENTS__GFX11 = 0x00000107, - CB_PERF_SEL_EXPORT_ADDED_4_FRAGMENTS__GFX11 = 0x00000108, - CB_PERF_SEL_EXPORT_ADDED_5_FRAGMENTS__GFX11 = 0x00000109, - CB_PERF_SEL_EXPORT_ADDED_6_FRAGMENTS__GFX11 = 0x0000010a, - CB_PERF_SEL_EXPORT_ADDED_7_FRAGMENTS__GFX11 = 0x0000010b, - CB_PERF_SEL_EXPORT_BLEND_OPT_DONT_READ_DST__GFX11 = 0x0000010c, - CB_PERF_SEL_EXPORT_BLEND_OPT_BLEND_BYPASS__GFX11 = 0x0000010d, - CB_PERF_SEL_EXPORT_BLEND_OPT_DISCARD_PIXELS__GFX11 = 0x0000010e, - CB_PERF_SEL_EXPORT_HAS_1_FRAGMENT_BEFORE_UPDATE__GFX11 = 0x0000010f, - CB_PERF_SEL_EXPORT_HAS_1_FRAGMENT_AFTER_UPDATE__GFX11 = 0x00000110, - CB_PERF_SEL_EXPORT_HAS_2_FRAGMENTS_BEFORE_UPDATE__GFX11 = 0x00000111, - CB_PERF_SEL_EXPORT_HAS_2_FRAGMENTS_AFTER_UPDATE__GFX11 = 0x00000112, - CB_PERF_SEL_EXPORT_HAS_3_FRAGMENTS_BEFORE_UPDATE__GFX11 = 0x00000113, - CB_PERF_SEL_EXPORT_HAS_3_FRAGMENTS_AFTER_UPDATE__GFX11 = 0x00000114, - CB_PERF_SEL_EXPORT_HAS_4_FRAGMENTS_BEFORE_UPDATE__GFX11 = 0x00000115, - CB_PERF_SEL_EXPORT_HAS_4_FRAGMENTS_AFTER_UPDATE__GFX11 = 0x00000116, - CB_PERF_SEL_EXPORT_HAS_5_FRAGMENTS_BEFORE_UPDATE__GFX11 = 0x00000117, - CB_PERF_SEL_EXPORT_HAS_5_FRAGMENTS_AFTER_UPDATE__GFX11 = 0x00000118, - CB_PERF_SEL_EXPORT_HAS_6_FRAGMENTS_BEFORE_UPDATE__GFX11 = 0x00000119, - CB_PERF_SEL_EXPORT_HAS_6_FRAGMENTS_AFTER_UPDATE__GFX11 = 0x0000011a, - CB_PERF_SEL_EXPORT_HAS_7_FRAGMENTS_BEFORE_UPDATE__GFX11 = 0x0000011b, - CB_PERF_SEL_EXPORT_HAS_7_FRAGMENTS_AFTER_UPDATE__GFX11 = 0x0000011c, - CB_PERF_SEL_EXPORT_HAS_8_FRAGMENTS_BEFORE_UPDATE__GFX11 = 0x0000011d, - CB_PERF_SEL_EXPORT_HAS_8_FRAGMENTS_AFTER_UPDATE__GFX11 = 0x0000011e, - CB_PERF_SEL_EXPORT_READS_FRAGMENT_0__GFX11 = 0x0000011f, - CB_PERF_SEL_EXPORT_READS_FRAGMENT_1__GFX11 = 0x00000120, - CB_PERF_SEL_EXPORT_READS_FRAGMENT_2__GFX11 = 0x00000121, - CB_PERF_SEL_EXPORT_READS_FRAGMENT_3__GFX11 = 0x00000122, - CB_PERF_SEL_EXPORT_READS_FRAGMENT_4__GFX11 = 0x00000123, - CB_PERF_SEL_EXPORT_READS_FRAGMENT_5__GFX11 = 0x00000124, - CB_PERF_SEL_EXPORT_READS_FRAGMENT_6__GFX11 = 0x00000125, - CB_PERF_SEL_EXPORT_READS_FRAGMENT_7__GFX11 = 0x00000126, - CB_PERF_SEL_EXPORT_REMOVED_1_FRAGMENT__GFX11 = 0x00000127, - CB_PERF_SEL_EXPORT_REMOVED_2_FRAGMENTS__GFX11 = 0x00000128, - CB_PERF_SEL_EXPORT_REMOVED_3_FRAGMENTS__GFX11 = 0x00000129, - CB_PERF_SEL_EXPORT_REMOVED_4_FRAGMENTS__GFX11 = 0x0000012a, - CB_PERF_SEL_EXPORT_REMOVED_5_FRAGMENTS__GFX11 = 0x0000012b, - CB_PERF_SEL_EXPORT_REMOVED_6_FRAGMENTS__GFX11 = 0x0000012c, - CB_PERF_SEL_EXPORT_REMOVED_7_FRAGMENTS__GFX11 = 0x0000012d, - CB_PERF_SEL_EXPORT_WRITES_FRAGMENT_0__GFX11 = 0x0000012e, - CB_PERF_SEL_EXPORT_WRITES_FRAGMENT_1__GFX11 = 0x0000012f, - CB_PERF_SEL_EXPORT_WRITES_FRAGMENT_2__GFX11 = 0x00000130, - CB_PERF_SEL_EXPORT_WRITES_FRAGMENT_3__GFX11 = 0x00000131, - CB_PERF_SEL_EXPORT_WRITES_FRAGMENT_4__GFX11 = 0x00000132, - CB_PERF_SEL_EXPORT_WRITES_FRAGMENT_5__GFX11 = 0x00000133, - CB_PERF_SEL_EXPORT_WRITES_FRAGMENT_6__GFX11 = 0x00000134, - CB_PERF_SEL_EXPORT_WRITES_FRAGMENT_7__GFX11 = 0x00000135, - CB_PERF_SEL_EXPORT_KILLED_BY_COLOR_INVALID__GFX11 = 0x00000136, - CB_PERF_SEL_EXPORT_KILLED_BY_DISCARD_PIXEL__GFX11 = 0x00000137, - CB_PERF_SEL_EXPORT_KILLED_BY_NULL_SAMPLE_MASK__GFX11 = 0x00000138, - CB_PERF_SEL_EXPORT_KILLED_BY_NULL_TARGET_SHADER_MASK__GFX11 = 0x00000139, -#endif - CB_PERF_SEL_FC_DCC_CACHE_SECTORS_FLUSHED__RN = 0x000000fd, - CB_PERF_SEL_FC_DCC_CACHE_DIRTY_SECTORS_FLUSHED__RN = 0x000000fe, - CB_PERF_SEL_FC_DCC_CACHE_TAGS_FLUSHED__RN = 0x000000ff, - CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_REG__RN = 0x000001b6, - CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_SINGLE__RN = 0x000001b7, - CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_AC00__RN = 0x000001b8, - CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_AC01__RN = 0x000001b9, - CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_AC10__RN = 0x000001ba, - CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_AC11__RN = 0x000001bb, - CB_PERF_SEL_FC_DCC_CACHE_TAGS_FLUSHED__VG10_VG12_VG20_RV1X_RV2X = 0x000000fd, - CB_PERF_SEL_FC_DCC_CACHE_SECTORS_FLUSHED__VG10_VG12_VG20_RV1X_RV2X = 0x000000fe, - CB_PERF_SEL_FC_DCC_CACHE_DIRTY_SECTORS_FLUSHED__VG10_VG12_VG20_RV1X_RV2X = 0x000000ff, -} CBPerfSel; - -constexpr unsigned int MaxCBPerfSelVg10_Vg12_Vg20_Rv1x_Rv2x = CB_PERF_SEL_CC_BB_BLEND_PIXEL_VLD__GFX09_10; -constexpr unsigned int MaxCBPerfSelRn = CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_AC11__RN; -constexpr unsigned int MaxCBPerfSelGfx10Core = CB_PERF_SEL_CC_CACHE_SECTOR_HIT__GFX10CORE; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxCBPerfSelGfx11 = CB_PERF_SEL_EXPORT_KILLED_BY_NULL_TARGET_SHADER_MASK__GFX11; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef enum CBRamList { - CB_DCG_CCC_CAS_TAG_ARRAY = 0x00000000, - CB_DCG_CCC_CAS_FRAG_PTR = 0x00000001, - CB_DCG_CCC_CAS_COLOR_PTR = 0x00000002, - CB_DCG_CCC_CAS_SURF_PARAM = 0x00000003, - CB_DCG_CCC_CAS_KEYID = 0x00000004, - CB_DCG_BACKEND_RDLAT_FIFO = 0x00000005, - CB_DCG_FRONTEND_RDLAT_FIFO = 0x00000006, - CB_DCG_SRC_FIFO = 0x00000007, - CB_DCG_COLOR_STORE = 0x00000008, - CB_DCG_COLOR_STORE_DIRTY_BYTE = 0x00000009, - CB_DCG_FMASK_CACHE_STORE = 0x0000000a, - CB_DCG_READ_SKID_FIFO = 0x0000000b, - CB_DCG_QUAD_PTR_FIFO = 0x0000000c, - CB_DCG_OUTPUT_FIFO = 0x0000000d, - CB_DCG_DCC_CACHE = 0x0000000e, - CB_DCG_DCC_DIRTY_BITS = 0x0000000f, - CB_DCG_FOP_MAX_FRAG = 0x00000010, - CB_DCG_QBLOCK_ALLOC = 0x00000011, - CB_DCG_DCC_COMP_RAM = 0x00000012, - CB_DCG_DCC_DECOMP_RAM = 0x00000013, -} CBRamList; -#endif - -typedef enum CHA_PERF_SEL { - CHA_PERF_SEL_BUSY = 0x00000000, - CHA_PERF_SEL_STALL_CHC0 = 0x00000001, - CHA_PERF_SEL_STALL_CHC1 = 0x00000002, - CHA_PERF_SEL_STALL_CHC2 = 0x00000003, - CHA_PERF_SEL_STALL_CHC3 = 0x00000004, -#if CHIP_HDR_PHOENIX1 - CHA_PERF_SEL_REQUEST_CHC0__APU11 = 0x00000005, - CHA_PERF_SEL_REQUEST_CHC1__APU11 = 0x00000006, - CHA_PERF_SEL_REQUEST_CHC2__APU11 = 0x00000007, - CHA_PERF_SEL_REQUEST_CHC3__APU11 = 0x00000008, - CHA_PERF_SEL_MEM_32B_WDS_CHC0__APU11 = 0x00000009, - CHA_PERF_SEL_MEM_32B_WDS_CHC1__APU11 = 0x0000000a, - CHA_PERF_SEL_MEM_32B_WDS_CHC2__APU11 = 0x0000000b, - CHA_PERF_SEL_MEM_32B_WDS_CHC3__APU11 = 0x0000000c, - CHA_PERF_SEL_IO_32B_WDS_CHC0__APU11 = 0x0000000d, - CHA_PERF_SEL_IO_32B_WDS_CHC1__APU11 = 0x0000000e, - CHA_PERF_SEL_IO_32B_WDS_CHC2__APU11 = 0x0000000f, - CHA_PERF_SEL_IO_32B_WDS_CHC3__APU11 = 0x00000010, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC0__APU11 = 0x00000011, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC1__APU11 = 0x00000012, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC2__APU11 = 0x00000013, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC3__APU11 = 0x00000014, - CHA_PERF_SEL_IO_BURST_COUNT_CHC0__APU11 = 0x00000015, - CHA_PERF_SEL_IO_BURST_COUNT_CHC1__APU11 = 0x00000016, - CHA_PERF_SEL_IO_BURST_COUNT_CHC2__APU11 = 0x00000017, - CHA_PERF_SEL_IO_BURST_COUNT_CHC3__APU11 = 0x00000018, - CHA_PERF_SEL_ARB_REQUESTS__APU11 = 0x00000019, - CHA_PERF_SEL_REQ_INFLIGHT_LEVEL__APU11 = 0x0000001a, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC0__APU11 = 0x0000001b, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC1__APU11 = 0x0000001c, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC2__APU11 = 0x0000001d, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC3__APU11 = 0x0000001e, - CHA_PERF_SEL_CYCLE__APU11 = 0x0000001f, -#endif - CHA_PERF_SEL_STALL_CHC4__GFX101 = 0x00000005, - CHA_PERF_SEL_STALL_CHC5__GFX101 = 0x00000006, - CHA_PERF_SEL_REQUEST_CHC0__GFX101 = 0x00000007, - CHA_PERF_SEL_REQUEST_CHC1__GFX101 = 0x00000008, - CHA_PERF_SEL_REQUEST_CHC2__GFX101 = 0x00000009, - CHA_PERF_SEL_REQUEST_CHC3__GFX101 = 0x0000000a, - CHA_PERF_SEL_REQUEST_CHC4__GFX101 = 0x0000000b, - CHA_PERF_SEL_MEM_32B_WDS_CHC0__GFX101 = 0x0000000c, - CHA_PERF_SEL_MEM_32B_WDS_CHC1__GFX101 = 0x0000000d, - CHA_PERF_SEL_MEM_32B_WDS_CHC2__GFX101 = 0x0000000e, - CHA_PERF_SEL_MEM_32B_WDS_CHC3__GFX101 = 0x0000000f, - CHA_PERF_SEL_MEM_32B_WDS_CHC4__GFX101 = 0x00000010, - CHA_PERF_SEL_IO_32B_WDS_CHC0__GFX101 = 0x00000011, - CHA_PERF_SEL_IO_32B_WDS_CHC1__GFX101 = 0x00000012, - CHA_PERF_SEL_IO_32B_WDS_CHC2__GFX101 = 0x00000013, - CHA_PERF_SEL_IO_32B_WDS_CHC3__GFX101 = 0x00000014, - CHA_PERF_SEL_IO_32B_WDS_CHC4__GFX101 = 0x00000015, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC0__GFX101 = 0x00000016, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC1__GFX101 = 0x00000017, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC2__GFX101 = 0x00000018, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC3__GFX101 = 0x00000019, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC4__GFX101 = 0x0000001a, - CHA_PERF_SEL_IO_BURST_COUNT_CHC0__GFX101 = 0x0000001b, - CHA_PERF_SEL_IO_BURST_COUNT_CHC1__GFX101 = 0x0000001c, - CHA_PERF_SEL_IO_BURST_COUNT_CHC2__GFX101 = 0x0000001d, - CHA_PERF_SEL_IO_BURST_COUNT_CHC3__GFX101 = 0x0000001e, - CHA_PERF_SEL_IO_BURST_COUNT_CHC4__GFX101 = 0x0000001f, - CHA_PERF_SEL_ARB_REQUESTS__GFX101 = 0x00000020, - CHA_PERF_SEL_REQ_ARB_LEVEL_CHC0__GFX101 = 0x00000021, - CHA_PERF_SEL_REQ_ARB_LEVEL_CHC1__GFX101 = 0x00000022, - CHA_PERF_SEL_REQ_ARB_LEVEL_CHC2__GFX101 = 0x00000023, - CHA_PERF_SEL_REQ_ARB_LEVEL_CHC3__GFX101 = 0x00000024, - CHA_PERF_SEL_REQ_ARB_LEVEL_CHC4__GFX101 = 0x00000025, - CHA_PERF_SEL_REQ_INFLIGHT_LEVEL__GFX101 = 0x00000026, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC0__GFX101 = 0x00000027, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC1__GFX101 = 0x00000028, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC2__GFX101 = 0x00000029, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC3__GFX101 = 0x0000002a, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC4__GFX101 = 0x0000002b, - CHA_PERF_SEL_CYCLE__GFX101 = 0x0000002c, -#if CHIP_HDR_NAVI21 - CHA_PERF_SEL_STALL_CHC4__NV21 = 0x00000005, - CHA_PERF_SEL_STALL_CHC5__NV21 = 0x00000006, - CHA_PERF_SEL_REQUEST_CHC0__NV21 = 0x00000007, - CHA_PERF_SEL_REQUEST_CHC1__NV21 = 0x00000008, - CHA_PERF_SEL_REQUEST_CHC2__NV21 = 0x00000009, - CHA_PERF_SEL_REQUEST_CHC3__NV21 = 0x0000000a, - CHA_PERF_SEL_REQUEST_CHC4__NV21 = 0x0000000b, - CHA_PERF_SEL_MEM_32B_WDS_CHC0__NV21 = 0x0000000c, - CHA_PERF_SEL_MEM_32B_WDS_CHC1__NV21 = 0x0000000d, - CHA_PERF_SEL_MEM_32B_WDS_CHC2__NV21 = 0x0000000e, - CHA_PERF_SEL_MEM_32B_WDS_CHC3__NV21 = 0x0000000f, - CHA_PERF_SEL_MEM_32B_WDS_CHC4__NV21 = 0x00000010, - CHA_PERF_SEL_IO_32B_WDS_CHC0__NV21 = 0x00000011, - CHA_PERF_SEL_IO_32B_WDS_CHC1__NV21 = 0x00000012, - CHA_PERF_SEL_IO_32B_WDS_CHC2__NV21 = 0x00000013, - CHA_PERF_SEL_IO_32B_WDS_CHC3__NV21 = 0x00000014, - CHA_PERF_SEL_IO_32B_WDS_CHC4__NV21 = 0x00000015, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC0__NV21 = 0x00000016, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC1__NV21 = 0x00000017, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC2__NV21 = 0x00000018, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC3__NV21 = 0x00000019, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC4__NV21 = 0x0000001a, - CHA_PERF_SEL_IO_BURST_COUNT_CHC0__NV21 = 0x0000001b, - CHA_PERF_SEL_IO_BURST_COUNT_CHC1__NV21 = 0x0000001c, - CHA_PERF_SEL_IO_BURST_COUNT_CHC2__NV21 = 0x0000001d, - CHA_PERF_SEL_IO_BURST_COUNT_CHC3__NV21 = 0x0000001e, - CHA_PERF_SEL_IO_BURST_COUNT_CHC4__NV21 = 0x0000001f, - CHA_PERF_SEL_ARB_REQUESTS__NV21 = 0x00000020, - CHA_PERF_SEL_REQ_INFLIGHT_LEVEL__NV21 = 0x00000021, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC0__NV21 = 0x00000022, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC1__NV21 = 0x00000023, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC2__NV21 = 0x00000024, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC3__NV21 = 0x00000025, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC4__NV21 = 0x00000026, - CHA_PERF_SEL_CYCLE__NV21 = 0x00000027, -#endif -#if CHIP_HDR_NAVI22 - CHA_PERF_SEL_STALL_CHC4__NV22 = 0x00000005, - CHA_PERF_SEL_STALL_CHC5__NV22 = 0x00000006, - CHA_PERF_SEL_REQUEST_CHC0__NV22 = 0x00000007, - CHA_PERF_SEL_REQUEST_CHC1__NV22 = 0x00000008, - CHA_PERF_SEL_REQUEST_CHC2__NV22 = 0x00000009, - CHA_PERF_SEL_REQUEST_CHC3__NV22 = 0x0000000a, - CHA_PERF_SEL_REQUEST_CHC4__NV22 = 0x0000000b, - CHA_PERF_SEL_MEM_32B_WDS_CHC0__NV22 = 0x0000000c, - CHA_PERF_SEL_MEM_32B_WDS_CHC1__NV22 = 0x0000000d, - CHA_PERF_SEL_MEM_32B_WDS_CHC2__NV22 = 0x0000000e, - CHA_PERF_SEL_MEM_32B_WDS_CHC3__NV22 = 0x0000000f, - CHA_PERF_SEL_MEM_32B_WDS_CHC4__NV22 = 0x00000010, - CHA_PERF_SEL_IO_32B_WDS_CHC0__NV22 = 0x00000011, - CHA_PERF_SEL_IO_32B_WDS_CHC1__NV22 = 0x00000012, - CHA_PERF_SEL_IO_32B_WDS_CHC2__NV22 = 0x00000013, - CHA_PERF_SEL_IO_32B_WDS_CHC3__NV22 = 0x00000014, - CHA_PERF_SEL_IO_32B_WDS_CHC4__NV22 = 0x00000015, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC0__NV22 = 0x00000016, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC1__NV22 = 0x00000017, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC2__NV22 = 0x00000018, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC3__NV22 = 0x00000019, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC4__NV22 = 0x0000001a, - CHA_PERF_SEL_IO_BURST_COUNT_CHC0__NV22 = 0x0000001b, - CHA_PERF_SEL_IO_BURST_COUNT_CHC1__NV22 = 0x0000001c, - CHA_PERF_SEL_IO_BURST_COUNT_CHC2__NV22 = 0x0000001d, - CHA_PERF_SEL_IO_BURST_COUNT_CHC3__NV22 = 0x0000001e, - CHA_PERF_SEL_IO_BURST_COUNT_CHC4__NV22 = 0x0000001f, - CHA_PERF_SEL_ARB_REQUESTS__NV22 = 0x00000020, - CHA_PERF_SEL_REQ_INFLIGHT_LEVEL__NV22 = 0x00000021, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC0__NV22 = 0x00000022, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC1__NV22 = 0x00000023, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC2__NV22 = 0x00000024, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC3__NV22 = 0x00000025, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC4__NV22 = 0x00000026, - CHA_PERF_SEL_CYCLE__NV22 = 0x00000027, -#endif -#if CHIP_HDR_NAVI23 - CHA_PERF_SEL_STALL_CHC4__NV23 = 0x00000005, - CHA_PERF_SEL_STALL_CHC5__NV23 = 0x00000006, - CHA_PERF_SEL_REQUEST_CHC0__NV23 = 0x00000007, - CHA_PERF_SEL_REQUEST_CHC1__NV23 = 0x00000008, - CHA_PERF_SEL_REQUEST_CHC2__NV23 = 0x00000009, - CHA_PERF_SEL_REQUEST_CHC3__NV23 = 0x0000000a, - CHA_PERF_SEL_REQUEST_CHC4__NV23 = 0x0000000b, - CHA_PERF_SEL_MEM_32B_WDS_CHC0__NV23 = 0x0000000c, - CHA_PERF_SEL_MEM_32B_WDS_CHC1__NV23 = 0x0000000d, - CHA_PERF_SEL_MEM_32B_WDS_CHC2__NV23 = 0x0000000e, - CHA_PERF_SEL_MEM_32B_WDS_CHC3__NV23 = 0x0000000f, - CHA_PERF_SEL_MEM_32B_WDS_CHC4__NV23 = 0x00000010, - CHA_PERF_SEL_IO_32B_WDS_CHC0__NV23 = 0x00000011, - CHA_PERF_SEL_IO_32B_WDS_CHC1__NV23 = 0x00000012, - CHA_PERF_SEL_IO_32B_WDS_CHC2__NV23 = 0x00000013, - CHA_PERF_SEL_IO_32B_WDS_CHC3__NV23 = 0x00000014, - CHA_PERF_SEL_IO_32B_WDS_CHC4__NV23 = 0x00000015, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC0__NV23 = 0x00000016, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC1__NV23 = 0x00000017, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC2__NV23 = 0x00000018, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC3__NV23 = 0x00000019, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC4__NV23 = 0x0000001a, - CHA_PERF_SEL_IO_BURST_COUNT_CHC0__NV23 = 0x0000001b, - CHA_PERF_SEL_IO_BURST_COUNT_CHC1__NV23 = 0x0000001c, - CHA_PERF_SEL_IO_BURST_COUNT_CHC2__NV23 = 0x0000001d, - CHA_PERF_SEL_IO_BURST_COUNT_CHC3__NV23 = 0x0000001e, - CHA_PERF_SEL_IO_BURST_COUNT_CHC4__NV23 = 0x0000001f, - CHA_PERF_SEL_ARB_REQUESTS__NV23 = 0x00000020, - CHA_PERF_SEL_REQ_INFLIGHT_LEVEL__NV23 = 0x00000021, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC0__NV23 = 0x00000022, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC1__NV23 = 0x00000023, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC2__NV23 = 0x00000024, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC3__NV23 = 0x00000025, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC4__NV23 = 0x00000026, - CHA_PERF_SEL_CYCLE__NV23 = 0x00000027, -#endif -#if CHIP_HDR_NAVI24 - CHA_PERF_SEL_STALL_CHC4__NV24 = 0x00000005, - CHA_PERF_SEL_STALL_CHC5__NV24 = 0x00000006, - CHA_PERF_SEL_REQUEST_CHC0__NV24 = 0x00000007, - CHA_PERF_SEL_REQUEST_CHC1__NV24 = 0x00000008, - CHA_PERF_SEL_REQUEST_CHC2__NV24 = 0x00000009, - CHA_PERF_SEL_REQUEST_CHC3__NV24 = 0x0000000a, - CHA_PERF_SEL_REQUEST_CHC4__NV24 = 0x0000000b, - CHA_PERF_SEL_MEM_32B_WDS_CHC0__NV24 = 0x0000000c, - CHA_PERF_SEL_MEM_32B_WDS_CHC1__NV24 = 0x0000000d, - CHA_PERF_SEL_MEM_32B_WDS_CHC2__NV24 = 0x0000000e, - CHA_PERF_SEL_MEM_32B_WDS_CHC3__NV24 = 0x0000000f, - CHA_PERF_SEL_MEM_32B_WDS_CHC4__NV24 = 0x00000010, - CHA_PERF_SEL_IO_32B_WDS_CHC0__NV24 = 0x00000011, - CHA_PERF_SEL_IO_32B_WDS_CHC1__NV24 = 0x00000012, - CHA_PERF_SEL_IO_32B_WDS_CHC2__NV24 = 0x00000013, - CHA_PERF_SEL_IO_32B_WDS_CHC3__NV24 = 0x00000014, - CHA_PERF_SEL_IO_32B_WDS_CHC4__NV24 = 0x00000015, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC0__NV24 = 0x00000016, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC1__NV24 = 0x00000017, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC2__NV24 = 0x00000018, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC3__NV24 = 0x00000019, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC4__NV24 = 0x0000001a, - CHA_PERF_SEL_IO_BURST_COUNT_CHC0__NV24 = 0x0000001b, - CHA_PERF_SEL_IO_BURST_COUNT_CHC1__NV24 = 0x0000001c, - CHA_PERF_SEL_IO_BURST_COUNT_CHC2__NV24 = 0x0000001d, - CHA_PERF_SEL_IO_BURST_COUNT_CHC3__NV24 = 0x0000001e, - CHA_PERF_SEL_IO_BURST_COUNT_CHC4__NV24 = 0x0000001f, - CHA_PERF_SEL_ARB_REQUESTS__NV24 = 0x00000020, - CHA_PERF_SEL_REQ_INFLIGHT_LEVEL__NV24 = 0x00000021, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC0__NV24 = 0x00000022, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC1__NV24 = 0x00000023, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC2__NV24 = 0x00000024, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC3__NV24 = 0x00000025, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC4__NV24 = 0x00000026, - CHA_PERF_SEL_CYCLE__NV24 = 0x00000027, -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - CHA_PERF_SEL_STALL_CHC4__NV3X = 0x00000005, - CHA_PERF_SEL_STALL_CHC5__NV3X = 0x00000006, - CHA_PERF_SEL_REQUEST_CHC0__NV3X = 0x00000007, - CHA_PERF_SEL_REQUEST_CHC1__NV3X = 0x00000008, - CHA_PERF_SEL_REQUEST_CHC2__NV3X = 0x00000009, - CHA_PERF_SEL_REQUEST_CHC3__NV3X = 0x0000000a, - CHA_PERF_SEL_REQUEST_CHC4__NV3X = 0x0000000b, - CHA_PERF_SEL_MEM_32B_WDS_CHC0__NV3X = 0x0000000c, - CHA_PERF_SEL_MEM_32B_WDS_CHC1__NV3X = 0x0000000d, - CHA_PERF_SEL_MEM_32B_WDS_CHC2__NV3X = 0x0000000e, - CHA_PERF_SEL_MEM_32B_WDS_CHC3__NV3X = 0x0000000f, - CHA_PERF_SEL_MEM_32B_WDS_CHC4__NV3X = 0x00000010, - CHA_PERF_SEL_IO_32B_WDS_CHC0__NV3X = 0x00000011, - CHA_PERF_SEL_IO_32B_WDS_CHC1__NV3X = 0x00000012, - CHA_PERF_SEL_IO_32B_WDS_CHC2__NV3X = 0x00000013, - CHA_PERF_SEL_IO_32B_WDS_CHC3__NV3X = 0x00000014, - CHA_PERF_SEL_IO_32B_WDS_CHC4__NV3X = 0x00000015, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC0__NV3X = 0x00000016, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC1__NV3X = 0x00000017, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC2__NV3X = 0x00000018, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC3__NV3X = 0x00000019, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC4__NV3X = 0x0000001a, - CHA_PERF_SEL_IO_BURST_COUNT_CHC0__NV3X = 0x0000001b, - CHA_PERF_SEL_IO_BURST_COUNT_CHC1__NV3X = 0x0000001c, - CHA_PERF_SEL_IO_BURST_COUNT_CHC2__NV3X = 0x0000001d, - CHA_PERF_SEL_IO_BURST_COUNT_CHC3__NV3X = 0x0000001e, - CHA_PERF_SEL_IO_BURST_COUNT_CHC4__NV3X = 0x0000001f, - CHA_PERF_SEL_ARB_REQUESTS__NV3X = 0x00000020, - CHA_PERF_SEL_REQ_INFLIGHT_LEVEL__NV3X = 0x00000021, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC0__NV3X = 0x00000022, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC1__NV3X = 0x00000023, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC2__NV3X = 0x00000024, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC3__NV3X = 0x00000025, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC4__NV3X = 0x00000026, - CHA_PERF_SEL_CYCLE__NV3X = 0x00000027, -#endif - CHA_PERF_SEL_REQUEST_CHC0__RAPHAEL = 0x00000005, - CHA_PERF_SEL_REQUEST_CHC1__RAPHAEL = 0x00000006, - CHA_PERF_SEL_REQUEST_CHC2__RAPHAEL = 0x00000007, - CHA_PERF_SEL_REQUEST_CHC3__RAPHAEL = 0x00000008, - CHA_PERF_SEL_MEM_32B_WDS_CHC0__RAPHAEL = 0x00000009, - CHA_PERF_SEL_MEM_32B_WDS_CHC1__RAPHAEL = 0x0000000a, - CHA_PERF_SEL_MEM_32B_WDS_CHC2__RAPHAEL = 0x0000000b, - CHA_PERF_SEL_MEM_32B_WDS_CHC3__RAPHAEL = 0x0000000c, - CHA_PERF_SEL_IO_32B_WDS_CHC0__RAPHAEL = 0x0000000d, - CHA_PERF_SEL_IO_32B_WDS_CHC1__RAPHAEL = 0x0000000e, - CHA_PERF_SEL_IO_32B_WDS_CHC2__RAPHAEL = 0x0000000f, - CHA_PERF_SEL_IO_32B_WDS_CHC3__RAPHAEL = 0x00000010, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC0__RAPHAEL = 0x00000011, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC1__RAPHAEL = 0x00000012, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC2__RAPHAEL = 0x00000013, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC3__RAPHAEL = 0x00000014, - CHA_PERF_SEL_IO_BURST_COUNT_CHC0__RAPHAEL = 0x00000015, - CHA_PERF_SEL_IO_BURST_COUNT_CHC1__RAPHAEL = 0x00000016, - CHA_PERF_SEL_IO_BURST_COUNT_CHC2__RAPHAEL = 0x00000017, - CHA_PERF_SEL_IO_BURST_COUNT_CHC3__RAPHAEL = 0x00000018, - CHA_PERF_SEL_ARB_REQUESTS__RAPHAEL = 0x00000019, - CHA_PERF_SEL_REQ_INFLIGHT_LEVEL__RAPHAEL = 0x0000001a, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC0__RAPHAEL = 0x0000001b, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC1__RAPHAEL = 0x0000001c, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC2__RAPHAEL = 0x0000001d, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC3__RAPHAEL = 0x0000001e, - CHA_PERF_SEL_CYCLE__RAPHAEL = 0x0000001f, - CHA_PERF_SEL_REQUEST_CHC0__REMBRANDT = 0x00000005, - CHA_PERF_SEL_REQUEST_CHC1__REMBRANDT = 0x00000006, - CHA_PERF_SEL_REQUEST_CHC2__REMBRANDT = 0x00000007, - CHA_PERF_SEL_REQUEST_CHC3__REMBRANDT = 0x00000008, - CHA_PERF_SEL_MEM_32B_WDS_CHC0__REMBRANDT = 0x00000009, - CHA_PERF_SEL_MEM_32B_WDS_CHC1__REMBRANDT = 0x0000000a, - CHA_PERF_SEL_MEM_32B_WDS_CHC2__REMBRANDT = 0x0000000b, - CHA_PERF_SEL_MEM_32B_WDS_CHC3__REMBRANDT = 0x0000000c, - CHA_PERF_SEL_IO_32B_WDS_CHC0__REMBRANDT = 0x0000000d, - CHA_PERF_SEL_IO_32B_WDS_CHC1__REMBRANDT = 0x0000000e, - CHA_PERF_SEL_IO_32B_WDS_CHC2__REMBRANDT = 0x0000000f, - CHA_PERF_SEL_IO_32B_WDS_CHC3__REMBRANDT = 0x00000010, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC0__REMBRANDT = 0x00000011, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC1__REMBRANDT = 0x00000012, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC2__REMBRANDT = 0x00000013, - CHA_PERF_SEL_MEM_BURST_COUNT_CHC3__REMBRANDT = 0x00000014, - CHA_PERF_SEL_IO_BURST_COUNT_CHC0__REMBRANDT = 0x00000015, - CHA_PERF_SEL_IO_BURST_COUNT_CHC1__REMBRANDT = 0x00000016, - CHA_PERF_SEL_IO_BURST_COUNT_CHC2__REMBRANDT = 0x00000017, - CHA_PERF_SEL_IO_BURST_COUNT_CHC3__REMBRANDT = 0x00000018, - CHA_PERF_SEL_ARB_REQUESTS__REMBRANDT = 0x00000019, - CHA_PERF_SEL_REQ_INFLIGHT_LEVEL__REMBRANDT = 0x0000001a, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC0__REMBRANDT = 0x0000001b, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC1__REMBRANDT = 0x0000001c, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC2__REMBRANDT = 0x0000001d, - CHA_PERF_SEL_STALL_RET_CONFLICT_CHC3__REMBRANDT = 0x0000001e, - CHA_PERF_SEL_CYCLE__REMBRANDT = 0x0000001f, -} CHA_PERF_SEL; - -constexpr unsigned int MaxChaPerfSelGfx101 = CHA_PERF_SEL_CYCLE__GFX101; -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -constexpr unsigned int MaxChaPerfSelNv3x = CHA_PERF_SEL_CYCLE__NV3X; -#endif -#if CHIP_HDR_NAVI24 -constexpr unsigned int MaxChaPerfSelNv24 = CHA_PERF_SEL_CYCLE__NV24; -#endif -#if CHIP_HDR_NAVI23 -constexpr unsigned int MaxChaPerfSelNv23 = CHA_PERF_SEL_CYCLE__NV23; -#endif -#if CHIP_HDR_NAVI22 -constexpr unsigned int MaxChaPerfSelNv22 = CHA_PERF_SEL_CYCLE__NV22; -#endif -#if CHIP_HDR_NAVI21 -constexpr unsigned int MaxChaPerfSelNv21 = CHA_PERF_SEL_CYCLE__NV21; -#endif -#if CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxChaPerfSelApu11 = CHA_PERF_SEL_CYCLE__APU11; -#endif -constexpr unsigned int MaxChaPerfSelRaphael = CHA_PERF_SEL_CYCLE__RAPHAEL; -constexpr unsigned int MaxChaPerfSelRembrandt = CHA_PERF_SEL_CYCLE__REMBRANDT; - -typedef enum CHCG_PERF_SEL { - CHCG_PERF_SEL_CYCLE__GFX101 = 0x00000000, - CHCG_PERF_SEL_BUSY__GFX101 = 0x00000001, - CHCG_PERF_SEL_STARVE__GFX101 = 0x00000002, - CHCG_PERF_SEL_ARB_RET_LEVEL__GFX101 = 0x00000003, - CHCG_PERF_SEL_GL2_REQ_READ_LATENCY__GFX101 = 0x00000004, - CHCG_PERF_SEL_GL2_REQ_WRITE_LATENCY__GFX101 = 0x00000005, - CHCG_PERF_SEL_REQ__GFX101 = 0x00000006, - CHCG_PERF_SEL_REQ_ATOMIC_WITH_RET__GFX101 = 0x00000007, - CHCG_PERF_SEL_REQ_ATOMIC_WITHOUT_RET__GFX101 = 0x00000008, - CHCG_PERF_SEL_REQ_NOP_ACK__GFX101 = 0x00000009, - CHCG_PERF_SEL_REQ_NOP_RTN0__GFX101 = 0x0000000a, - CHCG_PERF_SEL_REQ_READ__GFX101 = 0x0000000b, - CHCG_PERF_SEL_REQ_READ_128B__GFX101 = 0x0000000c, - CHCG_PERF_SEL_REQ_READ_32B__GFX101 = 0x0000000d, - CHCG_PERF_SEL_REQ_READ_64B__GFX101 = 0x0000000e, - CHCG_PERF_SEL_REQ_WRITE__GFX101 = 0x0000000f, - CHCG_PERF_SEL_REQ_WRITE_32B__GFX101 = 0x00000010, - CHCG_PERF_SEL_REQ_WRITE_64B__GFX101 = 0x00000011, - CHCG_PERF_SEL_STALL_GUS_GL1__GFX101 = 0x00000012, - CHCG_PERF_SEL_STALL_BUFFER_FULL__GFX101 = 0x00000013, - CHCG_PERF_SEL_REQ_CLIENT0__GFX101 = 0x00000014, - CHCG_PERF_SEL_REQ_CLIENT1__GFX101 = 0x00000015, - CHCG_PERF_SEL_REQ_CLIENT2__GFX101 = 0x00000016, - CHCG_PERF_SEL_REQ_CLIENT3__GFX101 = 0x00000017, - CHCG_PERF_SEL_REQ_CLIENT4__GFX101 = 0x00000018, - CHCG_PERF_SEL_REQ_CLIENT5__GFX101 = 0x00000019, - CHCG_PERF_SEL_REQ_CLIENT6__GFX101 = 0x0000001a, - CHCG_PERF_SEL_REQ_CLIENT7__GFX101 = 0x0000001b, - CHCG_PERF_SEL_REQ_CLIENT8__GFX101 = 0x0000001c, - CHCG_PERF_SEL_REQ_CLIENT9__GFX101 = 0x0000001d, - CHCG_PERF_SEL_REQ_CLIENT10__GFX101 = 0x0000001e, - CHCG_PERF_SEL_REQ_CLIENT11__GFX101 = 0x0000001f, - CHCG_PERF_SEL_REQ_CLIENT12__GFX101 = 0x00000020, - CHCG_PERF_SEL_REQ_CLIENT13__GFX101 = 0x00000021, - CHCG_PERF_SEL_REQ_CLIENT14__GFX101 = 0x00000022, -#if CHIP_HDR_NAVI21 - CHCG_PERF_SEL_CYCLE__NV21 = 0x00000000, - CHCG_PERF_SEL_BUSY__NV21 = 0x00000001, - CHCG_PERF_SEL_STARVE__NV21 = 0x00000002, - CHCG_PERF_SEL_ARB_RET_LEVEL__NV21 = 0x00000003, - CHCG_PERF_SEL_GL2_REQ_READ_LATENCY__NV21 = 0x00000004, - CHCG_PERF_SEL_GL2_REQ_WRITE_LATENCY__NV21 = 0x00000005, - CHCG_PERF_SEL_REQ__NV21 = 0x00000006, - CHCG_PERF_SEL_REQ_ATOMIC_WITH_RET__NV21 = 0x00000007, - CHCG_PERF_SEL_REQ_ATOMIC_WITHOUT_RET__NV21 = 0x00000008, - CHCG_PERF_SEL_REQ_NOP_ACK__NV21 = 0x00000009, - CHCG_PERF_SEL_REQ_NOP_RTN0__NV21 = 0x0000000a, - CHCG_PERF_SEL_REQ_READ__NV21 = 0x0000000b, - CHCG_PERF_SEL_REQ_READ_128B__NV21 = 0x0000000c, - CHCG_PERF_SEL_REQ_READ_32B__NV21 = 0x0000000d, - CHCG_PERF_SEL_REQ_READ_64B__NV21 = 0x0000000e, - CHCG_PERF_SEL_REQ_WRITE__NV21 = 0x0000000f, - CHCG_PERF_SEL_REQ_WRITE_32B__NV21 = 0x00000010, - CHCG_PERF_SEL_REQ_WRITE_64B__NV21 = 0x00000011, - CHCG_PERF_SEL_STALL_GUS_GL1__NV21 = 0x00000012, - CHCG_PERF_SEL_STALL_BUFFER_FULL__NV21 = 0x00000013, - CHCG_PERF_SEL_REQ_CLIENT0__NV21 = 0x00000014, - CHCG_PERF_SEL_REQ_CLIENT1__NV21 = 0x00000015, - CHCG_PERF_SEL_REQ_CLIENT2__NV21 = 0x00000016, - CHCG_PERF_SEL_REQ_CLIENT3__NV21 = 0x00000017, - CHCG_PERF_SEL_REQ_CLIENT4__NV21 = 0x00000018, - CHCG_PERF_SEL_REQ_CLIENT5__NV21 = 0x00000019, - CHCG_PERF_SEL_REQ_CLIENT6__NV21 = 0x0000001a, - CHCG_PERF_SEL_REQ_CLIENT7__NV21 = 0x0000001b, - CHCG_PERF_SEL_REQ_CLIENT8__NV21 = 0x0000001c, - CHCG_PERF_SEL_REQ_CLIENT9__NV21 = 0x0000001d, - CHCG_PERF_SEL_REQ_CLIENT10__NV21 = 0x0000001e, - CHCG_PERF_SEL_REQ_CLIENT11__NV21 = 0x0000001f, - CHCG_PERF_SEL_REQ_CLIENT12__NV21 = 0x00000020, - CHCG_PERF_SEL_REQ_CLIENT13__NV21 = 0x00000021, - CHCG_PERF_SEL_REQ_CLIENT14__NV21 = 0x00000022, - CHCG_PERF_SEL_REQ_CLIENT15__NV21 = 0x00000023, - CHCG_PERF_SEL_REQ_CLIENT16__NV21 = 0x00000024, - CHCG_PERF_SEL_REQ_CLIENT17__NV21 = 0x00000025, - CHCG_PERF_SEL_REQ_CLIENT18__NV21 = 0x00000026, - CHCG_PERF_SEL_REQ_CLIENT19__NV21 = 0x00000027, -#endif -#if CHIP_HDR_NAVI22 - CHCG_PERF_SEL_CYCLE__NV22 = 0x00000000, - CHCG_PERF_SEL_BUSY__NV22 = 0x00000001, - CHCG_PERF_SEL_STARVE__NV22 = 0x00000002, - CHCG_PERF_SEL_ARB_RET_LEVEL__NV22 = 0x00000003, - CHCG_PERF_SEL_GL2_REQ_READ_LATENCY__NV22 = 0x00000004, - CHCG_PERF_SEL_GL2_REQ_WRITE_LATENCY__NV22 = 0x00000005, - CHCG_PERF_SEL_REQ__NV22 = 0x00000006, - CHCG_PERF_SEL_REQ_ATOMIC_WITH_RET__NV22 = 0x00000007, - CHCG_PERF_SEL_REQ_ATOMIC_WITHOUT_RET__NV22 = 0x00000008, - CHCG_PERF_SEL_REQ_NOP_ACK__NV22 = 0x00000009, - CHCG_PERF_SEL_REQ_NOP_RTN0__NV22 = 0x0000000a, - CHCG_PERF_SEL_REQ_READ__NV22 = 0x0000000b, - CHCG_PERF_SEL_REQ_READ_128B__NV22 = 0x0000000c, - CHCG_PERF_SEL_REQ_READ_32B__NV22 = 0x0000000d, - CHCG_PERF_SEL_REQ_READ_64B__NV22 = 0x0000000e, - CHCG_PERF_SEL_REQ_WRITE__NV22 = 0x0000000f, - CHCG_PERF_SEL_REQ_WRITE_32B__NV22 = 0x00000010, - CHCG_PERF_SEL_REQ_WRITE_64B__NV22 = 0x00000011, - CHCG_PERF_SEL_STALL_GUS_GL1__NV22 = 0x00000012, - CHCG_PERF_SEL_STALL_BUFFER_FULL__NV22 = 0x00000013, - CHCG_PERF_SEL_REQ_CLIENT0__NV22 = 0x00000014, - CHCG_PERF_SEL_REQ_CLIENT1__NV22 = 0x00000015, - CHCG_PERF_SEL_REQ_CLIENT2__NV22 = 0x00000016, - CHCG_PERF_SEL_REQ_CLIENT3__NV22 = 0x00000017, - CHCG_PERF_SEL_REQ_CLIENT4__NV22 = 0x00000018, - CHCG_PERF_SEL_REQ_CLIENT5__NV22 = 0x00000019, - CHCG_PERF_SEL_REQ_CLIENT6__NV22 = 0x0000001a, - CHCG_PERF_SEL_REQ_CLIENT7__NV22 = 0x0000001b, - CHCG_PERF_SEL_REQ_CLIENT8__NV22 = 0x0000001c, - CHCG_PERF_SEL_REQ_CLIENT9__NV22 = 0x0000001d, - CHCG_PERF_SEL_REQ_CLIENT10__NV22 = 0x0000001e, - CHCG_PERF_SEL_REQ_CLIENT11__NV22 = 0x0000001f, - CHCG_PERF_SEL_REQ_CLIENT12__NV22 = 0x00000020, - CHCG_PERF_SEL_REQ_CLIENT13__NV22 = 0x00000021, - CHCG_PERF_SEL_REQ_CLIENT14__NV22 = 0x00000022, - CHCG_PERF_SEL_REQ_CLIENT15__NV22 = 0x00000023, - CHCG_PERF_SEL_REQ_CLIENT16__NV22 = 0x00000024, - CHCG_PERF_SEL_REQ_CLIENT17__NV22 = 0x00000025, - CHCG_PERF_SEL_REQ_CLIENT18__NV22 = 0x00000026, - CHCG_PERF_SEL_REQ_CLIENT19__NV22 = 0x00000027, -#endif -#if CHIP_HDR_NAVI23 - CHCG_PERF_SEL_CYCLE__NV23 = 0x00000000, - CHCG_PERF_SEL_BUSY__NV23 = 0x00000001, - CHCG_PERF_SEL_STARVE__NV23 = 0x00000002, - CHCG_PERF_SEL_ARB_RET_LEVEL__NV23 = 0x00000003, - CHCG_PERF_SEL_GL2_REQ_READ_LATENCY__NV23 = 0x00000004, - CHCG_PERF_SEL_GL2_REQ_WRITE_LATENCY__NV23 = 0x00000005, - CHCG_PERF_SEL_REQ__NV23 = 0x00000006, - CHCG_PERF_SEL_REQ_ATOMIC_WITH_RET__NV23 = 0x00000007, - CHCG_PERF_SEL_REQ_ATOMIC_WITHOUT_RET__NV23 = 0x00000008, - CHCG_PERF_SEL_REQ_NOP_ACK__NV23 = 0x00000009, - CHCG_PERF_SEL_REQ_NOP_RTN0__NV23 = 0x0000000a, - CHCG_PERF_SEL_REQ_READ__NV23 = 0x0000000b, - CHCG_PERF_SEL_REQ_READ_128B__NV23 = 0x0000000c, - CHCG_PERF_SEL_REQ_READ_32B__NV23 = 0x0000000d, - CHCG_PERF_SEL_REQ_READ_64B__NV23 = 0x0000000e, - CHCG_PERF_SEL_REQ_WRITE__NV23 = 0x0000000f, - CHCG_PERF_SEL_REQ_WRITE_32B__NV23 = 0x00000010, - CHCG_PERF_SEL_REQ_WRITE_64B__NV23 = 0x00000011, - CHCG_PERF_SEL_STALL_GUS_GL1__NV23 = 0x00000012, - CHCG_PERF_SEL_STALL_BUFFER_FULL__NV23 = 0x00000013, - CHCG_PERF_SEL_REQ_CLIENT0__NV23 = 0x00000014, - CHCG_PERF_SEL_REQ_CLIENT1__NV23 = 0x00000015, - CHCG_PERF_SEL_REQ_CLIENT2__NV23 = 0x00000016, - CHCG_PERF_SEL_REQ_CLIENT3__NV23 = 0x00000017, - CHCG_PERF_SEL_REQ_CLIENT4__NV23 = 0x00000018, - CHCG_PERF_SEL_REQ_CLIENT5__NV23 = 0x00000019, - CHCG_PERF_SEL_REQ_CLIENT6__NV23 = 0x0000001a, - CHCG_PERF_SEL_REQ_CLIENT7__NV23 = 0x0000001b, - CHCG_PERF_SEL_REQ_CLIENT8__NV23 = 0x0000001c, - CHCG_PERF_SEL_REQ_CLIENT9__NV23 = 0x0000001d, - CHCG_PERF_SEL_REQ_CLIENT10__NV23 = 0x0000001e, - CHCG_PERF_SEL_REQ_CLIENT11__NV23 = 0x0000001f, - CHCG_PERF_SEL_REQ_CLIENT12__NV23 = 0x00000020, - CHCG_PERF_SEL_REQ_CLIENT13__NV23 = 0x00000021, - CHCG_PERF_SEL_REQ_CLIENT14__NV23 = 0x00000022, - CHCG_PERF_SEL_REQ_CLIENT15__NV23 = 0x00000023, - CHCG_PERF_SEL_REQ_CLIENT16__NV23 = 0x00000024, - CHCG_PERF_SEL_REQ_CLIENT17__NV23 = 0x00000025, - CHCG_PERF_SEL_REQ_CLIENT18__NV23 = 0x00000026, - CHCG_PERF_SEL_REQ_CLIENT19__NV23 = 0x00000027, -#endif -#if CHIP_HDR_NAVI24 - CHCG_PERF_SEL_CYCLE__NV24 = 0x00000000, - CHCG_PERF_SEL_BUSY__NV24 = 0x00000001, - CHCG_PERF_SEL_STARVE__NV24 = 0x00000002, - CHCG_PERF_SEL_ARB_RET_LEVEL__NV24 = 0x00000003, - CHCG_PERF_SEL_GL2_REQ_READ_LATENCY__NV24 = 0x00000004, - CHCG_PERF_SEL_GL2_REQ_WRITE_LATENCY__NV24 = 0x00000005, - CHCG_PERF_SEL_REQ__NV24 = 0x00000006, - CHCG_PERF_SEL_REQ_ATOMIC_WITH_RET__NV24 = 0x00000007, - CHCG_PERF_SEL_REQ_ATOMIC_WITHOUT_RET__NV24 = 0x00000008, - CHCG_PERF_SEL_REQ_NOP_ACK__NV24 = 0x00000009, - CHCG_PERF_SEL_REQ_NOP_RTN0__NV24 = 0x0000000a, - CHCG_PERF_SEL_REQ_READ__NV24 = 0x0000000b, - CHCG_PERF_SEL_REQ_READ_128B__NV24 = 0x0000000c, - CHCG_PERF_SEL_REQ_READ_32B__NV24 = 0x0000000d, - CHCG_PERF_SEL_REQ_READ_64B__NV24 = 0x0000000e, - CHCG_PERF_SEL_REQ_WRITE__NV24 = 0x0000000f, - CHCG_PERF_SEL_REQ_WRITE_32B__NV24 = 0x00000010, - CHCG_PERF_SEL_REQ_WRITE_64B__NV24 = 0x00000011, - CHCG_PERF_SEL_STALL_GUS_GL1__NV24 = 0x00000012, - CHCG_PERF_SEL_STALL_BUFFER_FULL__NV24 = 0x00000013, - CHCG_PERF_SEL_REQ_CLIENT0__NV24 = 0x00000014, - CHCG_PERF_SEL_REQ_CLIENT1__NV24 = 0x00000015, - CHCG_PERF_SEL_REQ_CLIENT2__NV24 = 0x00000016, - CHCG_PERF_SEL_REQ_CLIENT3__NV24 = 0x00000017, - CHCG_PERF_SEL_REQ_CLIENT4__NV24 = 0x00000018, - CHCG_PERF_SEL_REQ_CLIENT5__NV24 = 0x00000019, - CHCG_PERF_SEL_REQ_CLIENT6__NV24 = 0x0000001a, - CHCG_PERF_SEL_REQ_CLIENT7__NV24 = 0x0000001b, - CHCG_PERF_SEL_REQ_CLIENT8__NV24 = 0x0000001c, - CHCG_PERF_SEL_REQ_CLIENT9__NV24 = 0x0000001d, - CHCG_PERF_SEL_REQ_CLIENT10__NV24 = 0x0000001e, - CHCG_PERF_SEL_REQ_CLIENT11__NV24 = 0x0000001f, - CHCG_PERF_SEL_REQ_CLIENT12__NV24 = 0x00000020, - CHCG_PERF_SEL_REQ_CLIENT13__NV24 = 0x00000021, - CHCG_PERF_SEL_REQ_CLIENT14__NV24 = 0x00000022, - CHCG_PERF_SEL_REQ_CLIENT15__NV24 = 0x00000023, - CHCG_PERF_SEL_REQ_CLIENT16__NV24 = 0x00000024, - CHCG_PERF_SEL_REQ_CLIENT17__NV24 = 0x00000025, - CHCG_PERF_SEL_REQ_CLIENT18__NV24 = 0x00000026, - CHCG_PERF_SEL_REQ_CLIENT19__NV24 = 0x00000027, -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - CHCG_PERF_SEL_CYCLE__NV3X = 0x00000000, - CHCG_PERF_SEL_BUSY__NV3X = 0x00000001, - CHCG_PERF_SEL_STARVE__NV3X = 0x00000002, - CHCG_PERF_SEL_ARB_RET_LEVEL__NV3X = 0x00000003, - CHCG_PERF_SEL_GL2_REQ_READ_LATENCY__NV3X = 0x00000004, - CHCG_PERF_SEL_GL2_REQ_WRITE_LATENCY__NV3X = 0x00000005, - CHCG_PERF_SEL_REQ__NV3X = 0x00000006, - CHCG_PERF_SEL_REQ_ATOMIC_WITH_RET__NV3X = 0x00000007, - CHCG_PERF_SEL_REQ_ATOMIC_WITHOUT_RET__NV3X = 0x00000008, - CHCG_PERF_SEL_REQ_NOP_ACK__NV3X = 0x00000009, - CHCG_PERF_SEL_REQ_NOP_RTN0__NV3X = 0x0000000a, - CHCG_PERF_SEL_REQ_READ__NV3X = 0x0000000b, - CHCG_PERF_SEL_REQ_READ_128B__NV3X = 0x0000000c, - CHCG_PERF_SEL_REQ_READ_32B__NV3X = 0x0000000d, - CHCG_PERF_SEL_REQ_READ_64B__NV3X = 0x0000000e, - CHCG_PERF_SEL_REQ_WRITE__NV3X = 0x0000000f, - CHCG_PERF_SEL_REQ_WRITE_32B__NV3X = 0x00000010, - CHCG_PERF_SEL_REQ_WRITE_64B__NV3X = 0x00000011, - CHCG_PERF_SEL_STALL_GUS_GL1__NV3X = 0x00000012, - CHCG_PERF_SEL_STALL_BUFFER_FULL__NV3X = 0x00000013, - CHCG_PERF_SEL_REQ_CLIENT0__NV3X = 0x00000014, - CHCG_PERF_SEL_REQ_CLIENT1__NV3X = 0x00000015, - CHCG_PERF_SEL_REQ_CLIENT2__NV3X = 0x00000016, - CHCG_PERF_SEL_REQ_CLIENT3__NV3X = 0x00000017, - CHCG_PERF_SEL_REQ_CLIENT4__NV3X = 0x00000018, - CHCG_PERF_SEL_REQ_CLIENT5__NV3X = 0x00000019, - CHCG_PERF_SEL_REQ_CLIENT6__NV3X = 0x0000001a, - CHCG_PERF_SEL_REQ_CLIENT7__NV3X = 0x0000001b, - CHCG_PERF_SEL_REQ_CLIENT8__NV3X = 0x0000001c, - CHCG_PERF_SEL_REQ_CLIENT9__NV3X = 0x0000001d, - CHCG_PERF_SEL_REQ_CLIENT10__NV3X = 0x0000001e, - CHCG_PERF_SEL_REQ_CLIENT11__NV3X = 0x0000001f, - CHCG_PERF_SEL_REQ_CLIENT12__NV3X = 0x00000020, - CHCG_PERF_SEL_REQ_CLIENT13__NV3X = 0x00000021, - CHCG_PERF_SEL_REQ_CLIENT14__NV3X = 0x00000022, - CHCG_PERF_SEL_REQ_CLIENT15__NV3X = 0x00000023, - CHCG_PERF_SEL_REQ_CLIENT16__NV3X = 0x00000024, - CHCG_PERF_SEL_REQ_CLIENT17__NV3X = 0x00000025, - CHCG_PERF_SEL_REQ_CLIENT18__NV3X = 0x00000026, - CHCG_PERF_SEL_REQ_CLIENT19__NV3X = 0x00000027, - CHCG_PERF_SEL_REQ_CLIENT20__NV3X = 0x00000028, - CHCG_PERF_SEL_REQ_CLIENT21__NV3X = 0x00000029, - CHCG_PERF_SEL_REQ_CLIENT22__NV3X = 0x0000002a, - CHCG_PERF_SEL_REQ_CLIENT23__NV3X = 0x0000002b, -#endif -} CHCG_PERF_SEL; - -constexpr unsigned int MaxChcgPerfSelGfx101 = CHCG_PERF_SEL_REQ_CLIENT14__GFX101; -#if CHIP_HDR_NAVI24 -constexpr unsigned int MaxChcgPerfSelNv24 = CHCG_PERF_SEL_REQ_CLIENT19__NV24; -#endif -#if CHIP_HDR_NAVI23 -constexpr unsigned int MaxChcgPerfSelNv23 = CHCG_PERF_SEL_REQ_CLIENT19__NV23; -#endif -#if CHIP_HDR_NAVI22 -constexpr unsigned int MaxChcgPerfSelNv22 = CHCG_PERF_SEL_REQ_CLIENT19__NV22; -#endif -#if CHIP_HDR_NAVI21 -constexpr unsigned int MaxChcgPerfSelNv21 = CHCG_PERF_SEL_REQ_CLIENT19__NV21; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -constexpr unsigned int MaxChcgPerfSelNv3x = CHCG_PERF_SEL_REQ_CLIENT23__NV3X; -#endif - -typedef enum CHC_PERF_SEL { - CHC_PERF_SEL_CYCLE = 0x00000000, - CHC_PERF_SEL_BUSY = 0x00000001, - CHC_PERF_SEL_STARVE = 0x00000002, - CHC_PERF_SEL_ARB_RET_LEVEL = 0x00000003, - CHC_PERF_SEL_GL2_REQ_READ_LATENCY = 0x00000004, - CHC_PERF_SEL_GL2_REQ_WRITE_LATENCY = 0x00000005, - CHC_PERF_SEL_REQ = 0x00000006, - CHC_PERF_SEL_REQ_ATOMIC_WITH_RET = 0x00000007, - CHC_PERF_SEL_REQ_ATOMIC_WITHOUT_RET = 0x00000008, - CHC_PERF_SEL_REQ_NOP_ACK = 0x00000009, - CHC_PERF_SEL_REQ_NOP_RTN0 = 0x0000000a, - CHC_PERF_SEL_REQ_READ = 0x0000000b, - CHC_PERF_SEL_REQ_READ_128B = 0x0000000c, - CHC_PERF_SEL_REQ_READ_32B = 0x0000000d, - CHC_PERF_SEL_REQ_READ_64B = 0x0000000e, - CHC_PERF_SEL_REQ_WRITE = 0x0000000f, - CHC_PERF_SEL_REQ_WRITE_32B = 0x00000010, - CHC_PERF_SEL_REQ_WRITE_64B = 0x00000011, - CHC_PERF_SEL_STALL_GL2_GL1 = 0x00000012, - CHC_PERF_SEL_STALL_BUFFER_FULL = 0x00000013, - CHC_PERF_SEL_REQ_CLIENT0 = 0x00000014, - CHC_PERF_SEL_REQ_CLIENT1 = 0x00000015, - CHC_PERF_SEL_REQ_CLIENT2 = 0x00000016, - CHC_PERF_SEL_REQ_CLIENT3 = 0x00000017, - CHC_PERF_SEL_REQ_CLIENT4 = 0x00000018, - CHC_PERF_SEL_REQ_CLIENT5 = 0x00000019, - CHC_PERF_SEL_REQ_CLIENT6 = 0x0000001a, - CHC_PERF_SEL_REQ_CLIENT7 = 0x0000001b, - CHC_PERF_SEL_REQ_CLIENT8 = 0x0000001c, - CHC_PERF_SEL_REQ_CLIENT9 = 0x0000001d, - CHC_PERF_SEL_REQ_CLIENT10 = 0x0000001e, - CHC_PERF_SEL_REQ_CLIENT11 = 0x0000001f, - CHC_PERF_SEL_REQ_CLIENT12 = 0x00000020, - CHC_PERF_SEL_REQ_CLIENT13 = 0x00000021, - CHC_PERF_SEL_REQ_CLIENT14 = 0x00000022, - CHC_PERF_SEL_REQ_CLIENT15__GFX103PLUSEXCLUSIVE = 0x00000023, - CHC_PERF_SEL_REQ_CLIENT16__GFX103PLUSEXCLUSIVE = 0x00000024, - CHC_PERF_SEL_REQ_CLIENT17__GFX103PLUSEXCLUSIVE = 0x00000025, - CHC_PERF_SEL_REQ_CLIENT18__GFX103PLUSEXCLUSIVE = 0x00000026, - CHC_PERF_SEL_REQ_CLIENT19__GFX103PLUSEXCLUSIVE = 0x00000027, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - CHC_PERF_SEL_REQ_CLIENT20__GFX11 = 0x00000028, - CHC_PERF_SEL_REQ_CLIENT21__GFX11 = 0x00000029, - CHC_PERF_SEL_REQ_CLIENT22__GFX11 = 0x0000002a, - CHC_PERF_SEL_REQ_CLIENT23__GFX11 = 0x0000002b, -#endif -} CHC_PERF_SEL; - -constexpr unsigned int MaxChcPerfSelGfx101 = CHC_PERF_SEL_REQ_CLIENT14; -constexpr unsigned int MaxChcPerfSelGfx103Derivative = CHC_PERF_SEL_REQ_CLIENT19__GFX103PLUSEXCLUSIVE; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxChcPerfSelGfx11 = CHC_PERF_SEL_REQ_CLIENT23__GFX11; -#endif - -typedef enum CmaskAddr { - CMASK_ADDR_TILED = 0x00000000, - CMASK_ADDR_LINEAR = 0x00000001, - CMASK_ADDR_COMPATIBLE = 0x00000002, -} CmaskAddr; - -typedef enum CmaskCode { - CMASK_CLR00_F0 = 0x00000000, - CMASK_CLR00_F1 = 0x00000001, - CMASK_CLR00_F2 = 0x00000002, - CMASK_CLR00_FX = 0x00000003, - CMASK_CLR01_F0 = 0x00000004, - CMASK_CLR01_F1 = 0x00000005, - CMASK_CLR01_F2 = 0x00000006, - CMASK_CLR01_FX = 0x00000007, - CMASK_CLR10_F0 = 0x00000008, - CMASK_CLR10_F1 = 0x00000009, - CMASK_CLR10_F2 = 0x0000000a, - CMASK_CLR10_FX = 0x0000000b, - CMASK_CLR11_F0 = 0x0000000c, - CMASK_CLR11_F1 = 0x0000000d, - CMASK_CLR11_F2 = 0x0000000e, - CMASK_CLR11_FX = 0x0000000f, -} CmaskCode; - -typedef enum CmaskMode { - CMASK_CLEAR_NONE = 0x00000000, - CMASK_CLEAR_ONE = 0x00000001, - CMASK_CLEAR_ALL = 0x00000002, - CMASK_ANY_EXPANDED = 0x00000003, - CMASK_ALPHA0_FRAG1 = 0x00000004, - CMASK_ALPHA0_FRAG2 = 0x00000005, - CMASK_ALPHA0_FRAG4 = 0x00000006, - CMASK_ALPHA0_FRAGS = 0x00000007, - CMASK_ALPHA1_FRAG1 = 0x00000008, - CMASK_ALPHA1_FRAG2 = 0x00000009, - CMASK_ALPHA1_FRAG4 = 0x0000000a, - CMASK_ALPHA1_FRAGS = 0x0000000b, - CMASK_ALPHAX_FRAG1 = 0x0000000c, - CMASK_ALPHAX_FRAG2 = 0x0000000d, - CMASK_ALPHAX_FRAG4 = 0x0000000e, - CMASK_ALPHAX_FRAGS = 0x0000000f, -} CmaskMode; - -typedef enum ColorArray { - ARRAY_2D_ALT_COLOR = 0x00000000, - ARRAY_2D_COLOR = 0x00000001, - ARRAY_3D_SLICE_COLOR = 0x00000003, -} ColorArray; - -typedef enum ColorFormat { - COLOR_INVALID = 0x00000000, - COLOR_8 = 0x00000001, - COLOR_16 = 0x00000002, - COLOR_8_8 = 0x00000003, - COLOR_32 = 0x00000004, - COLOR_16_16 = 0x00000005, - COLOR_10_11_11 = 0x00000006, - COLOR_11_11_10 = 0x00000007, - COLOR_10_10_10_2 = 0x00000008, - COLOR_2_10_10_10 = 0x00000009, - COLOR_8_8_8_8 = 0x0000000a, - COLOR_32_32 = 0x0000000b, - COLOR_16_16_16_16 = 0x0000000c, - COLOR_RESERVED_13 = 0x0000000d, - COLOR_32_32_32_32 = 0x0000000e, - COLOR_RESERVED_15 = 0x0000000f, - COLOR_5_6_5 = 0x00000010, - COLOR_1_5_5_5 = 0x00000011, - COLOR_5_5_5_1 = 0x00000012, - COLOR_4_4_4_4 = 0x00000013, - COLOR_8_24 = 0x00000014, - COLOR_24_8 = 0x00000015, - COLOR_X24_8_32_FLOAT = 0x00000016, - COLOR_RESERVED_23 = 0x00000017, - COLOR_RESERVED_25 = 0x00000019, - COLOR_RESERVED_26 = 0x0000001a, - COLOR_RESERVED_27 = 0x0000001b, - COLOR_RESERVED_28 = 0x0000001c, - COLOR_RESERVED_29 = 0x0000001d, - COLOR_2_10_10_10_6E4 = 0x0000001f, - COLOR_RESERVED_24__GFX09 = 0x00000018, - COLOR_RESERVED_30__GFX09 = 0x0000001e, - COLOR_RESERVED_24__GFX101 = 0x00000018, - COLOR_RESERVED_30__GFX101 = 0x0000001e, - COLOR_2_10_10_10_7E3__GFX103COREPLUS = 0x0000001e, - COLOR_5_9_9_9__GFX103PLUSEXCLUSIVE = 0x00000018, -} ColorFormat; - -typedef enum ColorTransform { - DCC_CT_AUTO = 0x00000000, - DCC_CT_NONE = 0x00000001, - ABGR_TO_A_BG_G_RB = 0x00000002, - BGRA_TO_BG_G_RB_A = 0x00000003, -} ColorTransform; - -typedef enum CombFunc { - COMB_DST_PLUS_SRC = 0x00000000, - COMB_SRC_MINUS_DST = 0x00000001, - COMB_MIN_DST_SRC = 0x00000002, - COMB_MAX_DST_SRC = 0x00000003, - COMB_DST_MINUS_SRC = 0x00000004, -} CombFunc; - -typedef enum CompareFrag { - FRAG_NEVER = 0x00000000, - FRAG_LESS = 0x00000001, - FRAG_EQUAL = 0x00000002, - FRAG_LEQUAL = 0x00000003, - FRAG_GREATER = 0x00000004, - FRAG_NOTEQUAL = 0x00000005, - FRAG_GEQUAL = 0x00000006, - FRAG_ALWAYS = 0x00000007, -} CompareFrag; - -typedef enum CompareRef { - REF_NEVER = 0x00000000, - REF_LESS = 0x00000001, - REF_EQUAL = 0x00000002, - REF_LEQUAL = 0x00000003, - REF_GREATER = 0x00000004, - REF_NOTEQUAL = 0x00000005, - REF_GEQUAL = 0x00000006, - REF_ALWAYS = 0x00000007, -} CompareRef; - -typedef enum ConservativeZExport { - EXPORT_ANY_Z = 0x00000000, - EXPORT_LESS_THAN_Z = 0x00000001, - EXPORT_GREATER_THAN_Z = 0x00000002, - EXPORT_RESERVED = 0x00000003, -} ConservativeZExport; - -typedef enum CovToShaderSel { - INPUT_COVERAGE = 0x00000000, - INPUT_INNER_COVERAGE = 0x00000001, - INPUT_DEPTH_COVERAGE = 0x00000002, - RAW = 0x00000003, -} CovToShaderSel; - -typedef enum CPC_LATENCY_STATS_SEL { - CPC_LATENCY_STATS_SEL_XACK_MAX = 0x00000000, - CPC_LATENCY_STATS_SEL_XACK_MIN = 0x00000001, - CPC_LATENCY_STATS_SEL_XACK_LAST = 0x00000002, - CPC_LATENCY_STATS_SEL_XNACK_MAX = 0x00000003, - CPC_LATENCY_STATS_SEL_XNACK_MIN = 0x00000004, - CPC_LATENCY_STATS_SEL_XNACK_LAST = 0x00000005, - CPC_LATENCY_STATS_SEL_INVAL_MAX__GFX10PLUS = 0x00000006, - CPC_LATENCY_STATS_SEL_INVAL_MIN__GFX10PLUS = 0x00000007, - CPC_LATENCY_STATS_SEL_INVAL_LAST__GFX10PLUS = 0x00000008, -} CPC_LATENCY_STATS_SEL; - -typedef enum CPC_PERFCOUNT_SEL { - CPC_PERF_SEL_ALWAYS_COUNT = 0x00000000, - CPC_PERF_SEL_RCIU_STALL_WAIT_ON_FREE = 0x00000001, - CPC_PERF_SEL_RCIU_STALL_PRIV_VIOLATION = 0x00000002, - CPC_PERF_SEL_TCIU_STALL_WAIT_ON_FREE = 0x00000005, - CPC_PERF_SEL_ME1_STALL_WAIT_ON_RCIU_READY = 0x00000006, - CPC_PERF_SEL_ME1_STALL_WAIT_ON_RCIU_READY_PERF = 0x00000007, - CPC_PERF_SEL_ME1_STALL_WAIT_ON_RCIU_READ = 0x00000008, - CPC_PERF_SEL_ME1_STALL_ON_DATA_FROM_ROQ__CORE = 0x0000000b, - CPC_PERF_SEL_ME1_STALL_ON_DATA_FROM_ROQ_PERF__CORE = 0x0000000c, - CPC_PERF_SEL_ME1_BUSY_FOR_PACKET_DECODE__CORE = 0x0000000d, - CPC_PERF_SEL_ME2_STALL_WAIT_ON_RCIU_READY__CORE = 0x0000000e, - CPC_PERF_SEL_ME2_STALL_WAIT_ON_RCIU_READY_PERF__CORE = 0x0000000f, - CPC_PERF_SEL_ME2_STALL_WAIT_ON_RCIU_READ__CORE = 0x00000010, - CPC_PERF_SEL_ME2_STALL_ON_DATA_FROM_ROQ__CORE = 0x00000013, - CPC_PERF_SEL_ME2_STALL_ON_DATA_FROM_ROQ_PERF__CORE = 0x00000014, - CPC_PERF_SEL_ME2_BUSY_FOR_PACKET_DECODE__CORE = 0x00000015, - CPC_PERF_SEL_UTCL2IU_STALL_WAIT_ON_FREE__CORE = 0x00000016, - CPC_PERF_SEL_UTCL2IU_STALL_WAIT_ON_TAGS__CORE = 0x00000017, - CPC_PERF_SEL_UTCL1_STALL_ON_TRANSLATION__CORE = 0x00000018, - CPC_PERF_SEL_CPC_STAT_BUSY__CORE = 0x00000019, - CPC_PERF_SEL_CPC_STAT_IDLE__CORE = 0x0000001a, - CPC_PERF_SEL_CPC_STAT_STALL__CORE = 0x0000001b, - CPC_PERF_SEL_CPC_TCIU_BUSY__CORE = 0x0000001c, - CPC_PERF_SEL_CPC_TCIU_IDLE__CORE = 0x0000001d, - CPC_PERF_SEL_CPC_UTCL2IU_BUSY__CORE = 0x0000001e, - CPC_PERF_SEL_CPC_UTCL2IU_IDLE__CORE = 0x0000001f, - CPC_PERF_SEL_CPC_UTCL2IU_STALL__CORE = 0x00000020, - CPC_PERF_SEL_ME1_DC0_SPI_BUSY__CORE = 0x00000021, - CPC_PERF_SEL_ME2_DC1_SPI_BUSY__CORE = 0x00000022, - CPC_PERF_SEL_ME1_STALL_WAIT_ON_MIU_READ__GFX09 = 0x00000009, - CPC_PERF_SEL_ME1_STALL_WAIT_ON_MIU_WRITE__GFX09 = 0x0000000a, - CPC_PERF_SEL_ME2_STALL_WAIT_ON_MIU_READ__GFX09 = 0x00000011, - CPC_PERF_SEL_ME2_STALL_WAIT_ON_MIU_WRITE__GFX09 = 0x00000012, - CPC_PERF_SEL_MIU_STALL_ON_RDREQ_FREE__GFX09_10 = 0x00000003, - CPC_PERF_SEL_MIU_STALL_ON_WRREQ_FREE__GFX09_10 = 0x00000004, - CPC_PERF_SEL_ME1_STALL_WAIT_ON_GUS_READ__GFX10CORE = 0x00000009, - CPC_PERF_SEL_ME1_STALL_WAIT_ON_GUS_WRITE__GFX10CORE = 0x0000000a, - CPC_PERF_SEL_ME2_STALL_WAIT_ON_GUS_READ__GFX10CORE = 0x00000011, - CPC_PERF_SEL_ME2_STALL_WAIT_ON_GUS_WRITE__GFX10CORE = 0x00000012, - CPC_PERF_SEL_CPC_GCRIU_BUSY__GFX10COREPLUS = 0x00000023, - CPC_PERF_SEL_CPC_GCRIU_IDLE__GFX10COREPLUS = 0x00000024, - CPC_PERF_SEL_CPC_GCRIU_STALL__GFX10COREPLUS = 0x00000025, - CPC_PERF_SEL_GCRIU_STALL_WAIT_ON_FREE__GFX10COREPLUS = 0x00000026, - CPC_PERF_SEL_ME1_STALL_WAIT_ON_TCIU_READ__GFX10COREPLUS = 0x00000027, - CPC_PERF_SEL_ME2_STALL_WAIT_ON_TCIU_READ__GFX10COREPLUS = 0x00000028, - CPC_PERF_SEL_CPC_UTCL2IU_XACK__GFX10COREPLUS = 0x00000029, - CPC_PERF_SEL_CPC_UTCL2IU_XNACK__GFX10COREPLUS = 0x0000002a, - CPC_PERF_SEL_MEC_INSTR_CACHE_HIT__GFX10COREPLUS = 0x0000002b, - CPC_PERF_SEL_MEC_INSTR_CACHE_MISS__GFX10COREPLUS = 0x0000002c, - CPC_PERF_SEL_MES_THREAD0__GFX10COREPLUS = 0x0000002d, - CPC_PERF_SEL_MES_THREAD1__GFX10COREPLUS = 0x0000002e, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - CPC_PERF_SEL_ME1_STALL_WAIT_ON_MEM_READ__GFX11 = 0x00000009, - CPC_PERF_SEL_ME1_STALL_WAIT_ON_MEM_WRITE__GFX11 = 0x0000000a, - CPC_PERF_SEL_ME2_STALL_WAIT_ON_MEM_READ__GFX11 = 0x00000011, - CPC_PERF_SEL_ME2_STALL_WAIT_ON_MEM_WRITE__GFX11 = 0x00000012, - CPC_PERF_SEL_TCIU_STALL_WAIT_ON_TAGS__GFX11 = 0x0000002f, - CPC_PERF_SEL_TCIU_WRITE_REQUEST_SENT__GFX11 = 0x00000030, - CPC_PERF_SEL_TCIU_READ_REQUEST_SENT__GFX11 = 0x00000031, - CPC_PERF_SEL_GUS_WRITE_REQUEST_SENT__GFX11 = 0x00000032, - CPC_PERF_SEL_GUS_READ_REQUEST_SENT__GFX11 = 0x00000033, - CPC_PERF_SEL_MEC_THREAD0__GFX11 = 0x00000034, - CPC_PERF_SEL_MEC_THREAD1__GFX11 = 0x00000035, - CPC_PERF_SEL_MEC_THREAD2__GFX11 = 0x00000036, - CPC_PERF_SEL_MEC_THREAD3__GFX11 = 0x00000037, -#endif -} CPC_PERFCOUNT_SEL; - -constexpr unsigned int MaxCpcPerfcountSelGfx09 = CPC_PERF_SEL_ME2_DC1_SPI_BUSY__CORE; -constexpr unsigned int MaxCpcPerfcountSelGfx10Core = CPC_PERF_SEL_MES_THREAD1__GFX10COREPLUS; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxCpcPerfcountSelGfx11 = CPC_PERF_SEL_MEC_THREAD3__GFX11; -#endif - -typedef enum CPF_LATENCY_STATS_SEL { - CPF_LATENCY_STATS_SEL_XACK_MAX = 0x00000000, - CPF_LATENCY_STATS_SEL_XACK_MIN = 0x00000001, - CPF_LATENCY_STATS_SEL_XACK_LAST = 0x00000002, - CPF_LATENCY_STATS_SEL_XNACK_MAX = 0x00000003, - CPF_LATENCY_STATS_SEL_XNACK_MIN = 0x00000004, - CPF_LATENCY_STATS_SEL_XNACK_LAST = 0x00000005, - CPF_LATENCY_STATS_SEL_READ_MAX = 0x00000006, - CPF_LATENCY_STATS_SEL_READ_MIN = 0x00000007, - CPF_LATENCY_STATS_SEL_READ_LAST = 0x00000008, - CPF_LATENCY_STATS_SEL_INVAL_MAX__GFX10PLUS = 0x00000009, - CPF_LATENCY_STATS_SEL_INVAL_MIN__GFX10PLUS = 0x0000000a, - CPF_LATENCY_STATS_SEL_INVAL_LAST__GFX10PLUS = 0x0000000b, -} CPF_LATENCY_STATS_SEL; - -typedef enum CPF_PERFCOUNTWINDOW_SEL { - CPF_PERFWINDOW_SEL_CSF = 0x00000000, - CPF_PERFWINDOW_SEL_HQD1 = 0x00000001, - CPF_PERFWINDOW_SEL_HQD2 = 0x00000002, - CPF_PERFWINDOW_SEL_RDMA = 0x00000003, - CPF_PERFWINDOW_SEL_RWPP = 0x00000004, -} CPF_PERFCOUNTWINDOW_SEL; - -typedef enum CPF_PERFCOUNT_SEL { - CPF_PERF_SEL_ALWAYS_COUNT = 0x00000000, - CPF_PERF_SEL_TCIU_STALLED_WAITING_ON_FREE = 0x00000002, - CPF_PERF_SEL_TCIU_STALLED_WAITING_ON_TAGS = 0x00000003, - CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_RING = 0x00000004, - CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_IB1 = 0x00000005, - CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_IB2 = 0x00000006, - CPF_PERF_SEL_CSF_STATE_FIFO_NOT_RTR = 0x0000000a, - CPF_PERF_SEL_CSF_FETCHING_CMD_BUFFERS = 0x0000000b, - CPF_PERF_SEL_GRBM_DWORDS_SENT = 0x0000000c, - CPF_PERF_SEL_DYNAMIC_CLOCK_VALID = 0x0000000d, - CPF_PERF_SEL_REGISTER_CLOCK_VALID = 0x0000000e, - CPF_PERF_SEL_UTCL2IU_STALL_WAIT_ON_FREE__CORE = 0x00000011, - CPF_PERF_SEL_UTCL2IU_STALL_WAIT_ON_TAGS__CORE = 0x00000012, - CPF_PERF_SEL_GFX_UTCL1_STALL_ON_TRANSLATION__CORE = 0x00000013, - CPF_PERF_SEL_CMP_UTCL1_STALL_ON_TRANSLATION__CORE = 0x00000014, - CPF_PERF_SEL_RCIU_STALL_WAIT_ON_FREE__CORE = 0x00000015, - CPF_PERF_SEL_CSF_BUSY_FOR_FECTHINC_STATE__GFX09 = 0x00000007, - CPF_PERF_SEL_MIU_WRITE_REQUEST_SEND__GFX09 = 0x0000000f, - CPF_PERF_SEL_MIU_READ_REQUEST_SEND__GFX09 = 0x00000010, - CPF_PERF_SEL_TCIU_READ_REQUEST_SENT__GFX09 = 0x00000016, - CPF_PERF_SEL_CPF_STAT_BUSY__GFX09 = 0x00000017, - CPF_PERF_SEL_CPF_STAT_IDLE__GFX09 = 0x00000018, - CPF_PERF_SEL_CPF_STAT_STALL__GFX09 = 0x00000019, - CPF_PERF_SEL_CPF_TCIU_BUSY__GFX09 = 0x0000001a, - CPF_PERF_SEL_CPF_TCIU_IDLE__GFX09 = 0x0000001b, - CPF_PERF_SEL_CPF_TCIU_STALL__GFX09 = 0x0000001c, - CPF_PERF_SEL_CPF_UTCL2IU_BUSY__GFX09 = 0x0000001d, - CPF_PERF_SEL_CPF_UTCL2IU_IDLE__GFX09 = 0x0000001e, - CPF_PERF_SEL_CPF_UTCL2IU_STALL__GFX09 = 0x0000001f, - CPF_PERF_SEL_MIU_STALLED_WAITING_RDREQ_FREE__GFX09_10 = 0x00000001, - CPF_PERF_SEL_MIU_BUSY_FOR_OUTSTANDING_TAGS__GFX09_10 = 0x00000008, - CPF_PERF_SEL_CSF_RTS_MIU_NOT_RTR__GFX09_10 = 0x00000009, - CPF_PERF_SEL_CSF_BUSY_FOR_FECTHINC_STATE__GFX101 = 0x00000007, - CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_STATE__GFX103PLUS = 0x00000007, - CPF_PERF_SEL_GUS_WRITE_REQUEST_SENT__GFX10COREPLUS = 0x0000000f, - CPF_PERF_SEL_GUS_READ_REQUEST_SENT__GFX10COREPLUS = 0x00000010, - CPF_PERF_SEL_TCIU_WRITE_REQUEST_SENT__GFX10COREPLUS = 0x00000016, - CPF_PERF_SEL_TCIU_READ_REQUEST_SENT__GFX10COREPLUS = 0x00000017, - CPF_PERF_SEL_CPF_STAT_BUSY__GFX10COREPLUS = 0x00000018, - CPF_PERF_SEL_CPF_STAT_IDLE__GFX10COREPLUS = 0x00000019, - CPF_PERF_SEL_CPF_STAT_STALL__GFX10COREPLUS = 0x0000001a, - CPF_PERF_SEL_CPF_TCIU_BUSY__GFX10COREPLUS = 0x0000001b, - CPF_PERF_SEL_CPF_TCIU_IDLE__GFX10COREPLUS = 0x0000001c, - CPF_PERF_SEL_CPF_TCIU_STALL__GFX10COREPLUS = 0x0000001d, - CPF_PERF_SEL_CPF_UTCL2IU_BUSY__GFX10COREPLUS = 0x0000001e, - CPF_PERF_SEL_CPF_UTCL2IU_IDLE__GFX10COREPLUS = 0x0000001f, - CPF_PERF_SEL_CPF_UTCL2IU_STALL__GFX10COREPLUS = 0x00000020, - CPF_PERF_SEL_CPF_GCRIU_BUSY__GFX10COREPLUS = 0x00000021, - CPF_PERF_SEL_CPF_GCRIU_IDLE__GFX10COREPLUS = 0x00000022, - CPF_PERF_SEL_CPF_GCRIU_STALL__GFX10COREPLUS = 0x00000023, - CPF_PERF_SEL_GCRIU_STALL_WAIT_ON_FREE__GFX10COREPLUS = 0x00000024, - CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_DB__GFX10COREPLUS = 0x00000025, - CPF_PERF_SEL_CPF_UTCL2IU_XACK__GFX10COREPLUS = 0x00000026, - CPF_PERF_SEL_CPF_UTCL2IU_XNACK__GFX10COREPLUS = 0x00000027, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - CPF_PERF_SEL_CP_SDMA_MNGR_DMA_REQ__GFX11 = 0x00000028, - CPF_PERF_SEL_CP_SDMA_MNGR_DMA_DONE__GFX11 = 0x00000029, - CPF_PERF_SEL_CP_SDMA_MNGR_LATENCY__GFX11 = 0x0000002a, - CPF_PERF_SEL_CP_SDMA_MNGR_SDMABUSY__GFX11 = 0x0000002b, -#endif -} CPF_PERFCOUNT_SEL; - -constexpr unsigned int MaxCpfPerfcountSelGfx09 = CPF_PERF_SEL_CPF_UTCL2IU_STALL__GFX09; -constexpr unsigned int MaxCpfPerfcountSelGfx10Core = CPF_PERF_SEL_CPF_UTCL2IU_XNACK__GFX10COREPLUS; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxCpfPerfcountSelGfx11 = CPF_PERF_SEL_CP_SDMA_MNGR_SDMABUSY__GFX11; -#endif - -typedef enum CPF_SCRATCH_REG_ATOMIC_OP { - CPF_SCRATCH_REG_ATOMIC_ADD = 0x00000000, - CPF_SCRATCH_REG_ATOMIC_SUB = 0x00000001, - CPF_SCRATCH_REG_ATOMIC_OR = 0x00000002, - CPF_SCRATCH_REG_ATOMIC_AND = 0x00000003, - CPF_SCRATCH_REG_ATOMIC_NOT = 0x00000004, - CPF_SCRATCH_REG_ATOMIC_MIN = 0x00000005, - CPF_SCRATCH_REG_ATOMIC_MAX = 0x00000006, - CPF_SCRATCH_REG_ATOMIC_CMPSWAP = 0x00000007, -} CPF_SCRATCH_REG_ATOMIC_OP; - -typedef enum CPG_LATENCY_STATS_SEL { - CPG_LATENCY_STATS_SEL_XACK_MAX = 0x00000000, - CPG_LATENCY_STATS_SEL_XACK_MIN = 0x00000001, - CPG_LATENCY_STATS_SEL_XACK_LAST = 0x00000002, - CPG_LATENCY_STATS_SEL_XNACK_MAX = 0x00000003, - CPG_LATENCY_STATS_SEL_XNACK_MIN = 0x00000004, - CPG_LATENCY_STATS_SEL_XNACK_LAST = 0x00000005, - CPG_LATENCY_STATS_SEL_WRITE_MAX = 0x00000006, - CPG_LATENCY_STATS_SEL_WRITE_MIN = 0x00000007, - CPG_LATENCY_STATS_SEL_WRITE_LAST = 0x00000008, - CPG_LATENCY_STATS_SEL_READ_MAX = 0x00000009, - CPG_LATENCY_STATS_SEL_READ_MIN = 0x0000000a, - CPG_LATENCY_STATS_SEL_READ_LAST = 0x0000000b, - CPG_LATENCY_STATS_SEL_INVAL_MAX__GFX09 = 0x0000000c, - CPG_LATENCY_STATS_SEL_INVAL_MIN__GFX09 = 0x0000000d, - CPG_LATENCY_STATS_SEL_INVAL_LAST__GFX09 = 0x0000000e, - CPG_LATENCY_STATS_SEL_ATOMIC_MAX__GFX09 = 0x0000000f, - CPG_LATENCY_STATS_SEL_ATOMIC_MIN__GFX09 = 0x00000010, - CPG_LATENCY_STATS_SEL_ATOMIC_LAST__GFX09 = 0x00000011, - CPG_LATENCY_STATS_SEL_ATOMIC_MAX__GFX10PLUS = 0x0000000c, - CPG_LATENCY_STATS_SEL_ATOMIC_MIN__GFX10PLUS = 0x0000000d, - CPG_LATENCY_STATS_SEL_ATOMIC_LAST__GFX10PLUS = 0x0000000e, - CPG_LATENCY_STATS_SEL_INVAL_MAX__GFX10PLUS = 0x0000000f, - CPG_LATENCY_STATS_SEL_INVAL_MIN__GFX10PLUS = 0x00000010, - CPG_LATENCY_STATS_SEL_INVAL_LAST__GFX10PLUS = 0x00000011, -} CPG_LATENCY_STATS_SEL; - -typedef enum CPG_PERFCOUNTWINDOW_SEL { - CPG_PERFWINDOW_SEL_PFP = 0x00000000, - CPG_PERFWINDOW_SEL_ME = 0x00000001, - CPG_PERFWINDOW_SEL_CE = 0x00000002, - CPG_PERFWINDOW_SEL_MEC1 = 0x00000004, - CPG_PERFWINDOW_SEL_MEC2 = 0x00000005, - CPG_PERFWINDOW_SEL_DFY = 0x00000006, - CPG_PERFWINDOW_SEL_DMA = 0x00000007, - CPG_PERFWINDOW_SEL_SHADOW = 0x00000008, - CPG_PERFWINDOW_SEL_RB = 0x00000009, - CPG_PERFWINDOW_SEL_CEDMA = 0x0000000a, - CPG_PERFWINDOW_SEL_PRT_HDR_RPTR = 0x0000000b, - CPG_PERFWINDOW_SEL_PRT_SMP_RPTR = 0x0000000c, - CPG_PERFWINDOW_SEL_PQ1 = 0x0000000d, - CPG_PERFWINDOW_SEL_PQ2 = 0x0000000e, - CPG_PERFWINDOW_SEL_RESERVED1__GFX09 = 0x00000003, - CPG_PERFWINDOW_SEL_MEMWR__GFX09 = 0x0000000f, - CPG_PERFWINDOW_SEL_MEMRD__GFX09 = 0x00000010, - CPG_PERFWINDOW_SEL_VGT0__GFX09 = 0x00000011, - CPG_PERFWINDOW_SEL_VGT1__GFX09 = 0x00000012, - CPG_PERFWINDOW_SEL_APPEND__GFX09 = 0x00000013, - CPG_PERFWINDOW_SEL_QURD__GFX09 = 0x00000014, - CPG_PERFWINDOW_SEL_CPCQU__GFX09 = 0x00000015, - CPG_PERFWINDOW_SEL_SR__GFX09 = 0x00000016, - CPG_PERFWINDOW_SEL_QU_EOP__GFX09 = 0x00000017, - CPG_PERFWINDOW_SEL_QU_STRM__GFX09 = 0x00000018, - CPG_PERFWINDOW_SEL_QU_PIPE__GFX09 = 0x00000019, - CPG_PERFWINDOW_SEL_RESERVED2__GFX09 = 0x0000001a, - CPG_PERFWINDOW_SEL_CPC_IC__GFX09 = 0x0000001b, - CPG_PERFWINDOW_SEL_SD__GFX09 = 0x0000001c, - CPG_PERFWINDOW_SEL_MES__GFX10PLUS = 0x00000003, - CPG_PERFWINDOW_SEL_PQ3__GFX10PLUS = 0x0000000f, - CPG_PERFWINDOW_SEL_MEMWR__GFX10PLUS = 0x00000010, - CPG_PERFWINDOW_SEL_MEMRD__GFX10PLUS = 0x00000011, - CPG_PERFWINDOW_SEL_VGT0__GFX10PLUS = 0x00000012, - CPG_PERFWINDOW_SEL_VGT1__GFX10PLUS = 0x00000013, - CPG_PERFWINDOW_SEL_APPEND__GFX10PLUS = 0x00000014, - CPG_PERFWINDOW_SEL_QURD__GFX10PLUS = 0x00000015, - CPG_PERFWINDOW_SEL_DDID__GFX10PLUS = 0x00000016, - CPG_PERFWINDOW_SEL_SR__GFX10PLUS = 0x00000017, - CPG_PERFWINDOW_SEL_QU_EOP__GFX10PLUS = 0x00000018, - CPG_PERFWINDOW_SEL_QU_STRM__GFX10PLUS = 0x00000019, - CPG_PERFWINDOW_SEL_QU_PIPE__GFX10PLUS = 0x0000001a, - CPG_PERFWINDOW_SEL_RESERVED1__GFX10PLUS = 0x0000001b, - CPG_PERFWINDOW_SEL_CPC_IC__GFX10PLUS = 0x0000001c, - CPG_PERFWINDOW_SEL_RESERVED2__GFX10PLUS = 0x0000001d, - CPG_PERFWINDOW_SEL_CPG_IC__GFX10PLUS = 0x0000001e, -} CPG_PERFCOUNTWINDOW_SEL; - -typedef enum CPG_PERFCOUNT_SEL { - CPG_PERF_SEL_ALWAYS_COUNT = 0x00000000, - CPG_PERF_SEL_RBIU_FIFO_FULL = 0x00000001, - CPG_PERF_SEL_CP_GRBM_DWORDS_SENT = 0x00000004, - CPG_PERF_SEL_ME_PARSER_BUSY = 0x00000005, - CPG_PERF_SEL_COUNT_TYPE0_PACKETS = 0x00000006, - CPG_PERF_SEL_COUNT_TYPE3_PACKETS = 0x00000007, - CPG_PERF_SEL_CP_GRBM_OUT_OF_CREDITS = 0x00000009, - CPG_PERF_SEL_CP_PFP_GRBM_OUT_OF_CREDITS = 0x0000000a, - CPG_PERF_SEL_CP_GDS_GRBM_OUT_OF_CREDITS = 0x0000000b, - CPG_PERF_SEL_RCIU_STALLED_ON_ME_READ = 0x0000000c, - CPG_PERF_SEL_RCIU_STALLED_ON_DMA_READ = 0x0000000d, - CPG_PERF_SEL_SSU_STALLED_ON_ACTIVE_CNTX = 0x0000000e, - CPG_PERF_SEL_CSF_RTS_BUT_MIU_NOT_RTR__GFX09_10 = 0x00000002, - CPG_PERF_SEL_CSF_ST_BASE_SIZE_FIFO_FULL__GFX09_10 = 0x00000003, - CPG_PERF_SEL_CSF_FETCHING_CMD_BUFFERS__GFX09_10 = 0x00000008, - CPG_PERF_SEL_SEMAPHORE_BUSY_POLLING_FOR_PASS__GFX10 = 0x0000001e, - CPG_PERF_SEL_CE_STALL_ON_DATA_FROM_MIU__GFX10CORE = 0x00000028, - CPG_PERF_SEL_GUS_WRITE_REQUEST_SENT__GFX10COREPLUS = 0x00000022, - CPG_PERF_SEL_GUS_READ_REQUEST_SENT__GFX10COREPLUS = 0x00000023, - CPG_PERF_SEL_CE_STALL_RAM_DUMP__GFX10COREPLUS = 0x00000024, - CPG_PERF_SEL_CE_STALL_RAM_WRITE__GFX10COREPLUS = 0x00000025, - CPG_PERF_SEL_CE_STALL_ON_INC_FIFO__GFX10COREPLUS = 0x00000026, - CPG_PERF_SEL_CE_STALL_ON_WR_RAM_FIFO__GFX10COREPLUS = 0x00000027, - CPG_PERF_SEL_CE_STALL_ON_DATA_FROM_ROQ__GFX10COREPLUS = 0x00000029, - CPG_PERF_SEL_CE_STALL_ON_CE_BUFFER_FLAG__GFX10COREPLUS = 0x0000002a, - CPG_PERF_SEL_CE_STALL_ON_DE_COUNTER__GFX10COREPLUS = 0x0000002b, - CPG_PERF_SEL_TCIU_STALL_WAIT_ON_FREE__GFX10COREPLUS = 0x0000002c, - CPG_PERF_SEL_TCIU_STALL_WAIT_ON_TAGS__GFX10COREPLUS = 0x0000002d, - CPG_PERF_SEL_UTCL2IU_STALL_WAIT_ON_FREE__GFX10COREPLUS = 0x0000002e, - CPG_PERF_SEL_UTCL2IU_STALL_WAIT_ON_TAGS__GFX10COREPLUS = 0x0000002f, - CPG_PERF_SEL_UTCL1_STALL_ON_TRANSLATION__GFX10COREPLUS = 0x00000030, - CPG_PERF_SEL_TCIU_WRITE_REQUEST_SENT__GFX10COREPLUS = 0x00000031, - CPG_PERF_SEL_TCIU_READ_REQUEST_SENT__GFX10COREPLUS = 0x00000032, - CPG_PERF_SEL_CPG_STAT_BUSY__GFX10COREPLUS = 0x00000033, - CPG_PERF_SEL_CPG_STAT_IDLE__GFX10COREPLUS = 0x00000034, - CPG_PERF_SEL_CPG_STAT_STALL__GFX10COREPLUS = 0x00000035, - CPG_PERF_SEL_CPG_TCIU_BUSY__GFX10COREPLUS = 0x00000036, - CPG_PERF_SEL_CPG_TCIU_IDLE__GFX10COREPLUS = 0x00000037, - CPG_PERF_SEL_CPG_TCIU_STALL__GFX10COREPLUS = 0x00000038, - CPG_PERF_SEL_CPG_UTCL2IU_BUSY__GFX10COREPLUS = 0x00000039, - CPG_PERF_SEL_CPG_UTCL2IU_IDLE__GFX10COREPLUS = 0x0000003a, - CPG_PERF_SEL_CPG_UTCL2IU_STALL__GFX10COREPLUS = 0x0000003b, - CPG_PERF_SEL_CPG_GCRIU_BUSY__GFX10COREPLUS = 0x0000003c, - CPG_PERF_SEL_CPG_GCRIU_IDLE__GFX10COREPLUS = 0x0000003d, - CPG_PERF_SEL_CPG_GCRIU_STALL__GFX10COREPLUS = 0x0000003e, - CPG_PERF_SEL_GCRIU_STALL_WAIT_ON_FREE__GFX10COREPLUS = 0x0000003f, - CPG_PERF_SEL_ALL_GFX_PIPES_BUSY__GFX10COREPLUS = 0x00000040, - CPG_PERF_SEL_CPG_UTCL2IU_XACK__GFX10COREPLUS = 0x00000041, - CPG_PERF_SEL_CPG_UTCL2IU_XNACK__GFX10COREPLUS = 0x00000042, - CPG_PERF_SEL_PFP_STALLED_ON_MEQ_DDID_READY__GFX10COREPLUS = 0x00000043, - CPG_PERF_SEL_PFP_INSTR_CACHE_HIT__GFX10COREPLUS = 0x00000044, - CPG_PERF_SEL_PFP_INSTR_CACHE_MISS__GFX10COREPLUS = 0x00000045, - CPG_PERF_SEL_CE_INSTR_CACHE_HIT__GFX10COREPLUS = 0x00000046, - CPG_PERF_SEL_CE_INSTR_CACHE_MISS__GFX10COREPLUS = 0x00000047, - CPG_PERF_SEL_ME_INSTR_CACHE_HIT__GFX10COREPLUS = 0x00000048, - CPG_PERF_SEL_ME_INSTR_CACHE_MISS__GFX10COREPLUS = 0x00000049, - CPG_PERF_SEL_PFP_PACKET_FILTER_HIT_IB1__GFX10COREPLUS = 0x0000004a, - CPG_PERF_SEL_PFP_PACKET_FILTER_MISS_IB1__GFX10COREPLUS = 0x0000004b, - CPG_PERF_SEL_PFP_PACKET_FILTER_HIT_IB2__GFX10COREPLUS = 0x0000004c, - CPG_PERF_SEL_PFP_PACKET_FILTER_MISS_IB2__GFX10COREPLUS = 0x0000004d, - CPG_PERF_SEL_DMA_BUSY__GFX10COREPLUS = 0x0000004e, - CPG_PERF_SEL_DMA_STARVED__GFX10COREPLUS = 0x0000004f, - CPG_PERF_SEL_DMA_STALLED__GFX10COREPLUS = 0x00000050, - CPG_PERF_SEL_DMA_FETCHER_STALLED_ON_ROQ_FULL__GFX10COREPLUS = 0x00000051, - CPG_PERF_SEL_SSU_STALLED_ON_CLEAN_SIGNALS__GFX10PLUS = 0x0000000f, - CPG_PERF_SEL_QU_STALLED_ON_EOP_DONE_PULSE__GFX10PLUS = 0x00000010, - CPG_PERF_SEL_QU_STALLED_ON_EOP_DONE_WR_CONFIRM__GFX10PLUS = 0x00000011, - CPG_PERF_SEL_PFP_STALLED_ON_CSF_READY__GFX10PLUS = 0x00000012, - CPG_PERF_SEL_PFP_STALLED_ON_MEQ_READY__GFX10PLUS = 0x00000013, - CPG_PERF_SEL_PFP_STALLED_ON_RCIU_READY__GFX10PLUS = 0x00000014, - CPG_PERF_SEL_PFP_STALLED_FOR_DATA_FROM_ROQ__GFX10PLUS = 0x00000015, - CPG_PERF_SEL_ME_STALLED_FOR_DATA_FROM_PFP__GFX10PLUS = 0x00000016, - CPG_PERF_SEL_ME_STALLED_FOR_DATA_FROM_STQ__GFX10PLUS = 0x00000017, - CPG_PERF_SEL_ME_STALLED_ON_NO_AVAIL_GFX_CNTX__GFX10PLUS = 0x00000018, - CPG_PERF_SEL_ME_STALLED_WRITING_TO_RCIU__GFX10PLUS = 0x00000019, - CPG_PERF_SEL_ME_STALLED_WRITING_CONSTANTS__GFX10PLUS = 0x0000001a, - CPG_PERF_SEL_ME_STALLED_ON_PARTIAL_FLUSH__GFX10PLUS = 0x0000001b, - CPG_PERF_SEL_ME_WAIT_ON_CE_COUNTER__GFX10PLUS = 0x0000001c, - CPG_PERF_SEL_ME_WAIT_ON_AVAIL_BUFFER__GFX10PLUS = 0x0000001d, - CPG_PERF_SEL_LOAD_STALLED_ON_SET_COHERENCY__GFX10PLUS = 0x0000001f, - CPG_PERF_SEL_DYNAMIC_CLK_VALID__GFX10PLUS = 0x00000020, - CPG_PERF_SEL_REGISTER_CLK_VALID__GFX10PLUS = 0x00000021, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - CPG_PERF_SEL_PFP_PWS_STALLED0__GFX11 = 0x00000052, - CPG_PERF_SEL_ME_PWS_STALLED0__GFX11 = 0x00000053, - CPG_PERF_SEL_PFP_VGTDMA_INDR_STRUCT_BYPASS0__GFX11 = 0x00000054, - CPG_PERF_SEL_PFP_VGTDMA_INDR_STRUCT_NOT_BYPASS0__GFX11 = 0x00000055, - CPG_PERF_SEL_PFP_VGTDMA_DB_ROQ_DATA_STALL0__GFX11 = 0x00000056, - CPG_PERF_SEL_PFP_PWS_STALLED1__GFX11 = 0x00000057, - CPG_PERF_SEL_ME_PWS_STALLED1__GFX11 = 0x00000058, - CPG_PERF_SEL_PFP_VGTDMA_INDR_STRUCT_BYPASS1__GFX11 = 0x00000059, - CPG_PERF_SEL_PFP_VGTDMA_INDR_STRUCT_NOT_BYPASS1__GFX11 = 0x0000005a, - CPG_PERF_SEL_PFP_VGTDMA_DB_ROQ_DATA_STALL1__GFX11 = 0x0000005b, -#endif - CPG_PERF_SEL_SSU_STALLED_ON_CLEAN_SIGNALS__RV1X_RV2X = 0x0000000f, - CPG_PERF_SEL_QU_STALLED_ON_EOP_DONE_PULSE__RV1X_RV2X = 0x00000010, - CPG_PERF_SEL_QU_STALLED_ON_EOP_DONE_WR_CONFIRM__RV1X_RV2X = 0x00000011, - CPG_PERF_SEL_PFP_STALLED_ON_CSF_READY__RV1X_RV2X = 0x00000012, - CPG_PERF_SEL_PFP_STALLED_ON_MEQ_READY__RV1X_RV2X = 0x00000013, - CPG_PERF_SEL_PFP_STALLED_ON_RCIU_READY__RV1X_RV2X = 0x00000014, - CPG_PERF_SEL_PFP_STALLED_FOR_DATA_FROM_ROQ__RV1X_RV2X = 0x00000015, - CPG_PERF_SEL_ME_STALLED_FOR_DATA_FROM_PFP__RV1X_RV2X = 0x00000016, - CPG_PERF_SEL_ME_STALLED_FOR_DATA_FROM_STQ__RV1X_RV2X = 0x00000017, - CPG_PERF_SEL_ME_STALLED_ON_NO_AVAIL_GFX_CNTX__RV1X_RV2X = 0x00000018, - CPG_PERF_SEL_ME_STALLED_WRITING_TO_RCIU__RV1X_RV2X = 0x00000019, - CPG_PERF_SEL_ME_STALLED_WRITING_CONSTANTS__RV1X_RV2X = 0x0000001a, - CPG_PERF_SEL_ME_STALLED_ON_PARTIAL_FLUSH__RV1X_RV2X = 0x0000001b, - CPG_PERF_SEL_ME_WAIT_ON_CE_COUNTER__RV1X_RV2X = 0x0000001c, - CPG_PERF_SEL_ME_WAIT_ON_AVAIL_BUFFER__RV1X_RV2X = 0x0000001d, - CPG_PERF_SEL_SEMAPHORE_BUSY_POLLING_FOR_PASS__RV1X_RV2X = 0x0000001e, - CPG_PERF_SEL_LOAD_STALLED_ON_SET_COHERENCY__RV1X_RV2X = 0x0000001f, - CPG_PERF_SEL_DYNAMIC_CLK_VALID__RV1X_RV2X = 0x00000020, - CPG_PERF_SEL_REGISTER_CLK_VALID__RV1X_RV2X = 0x00000021, - CPG_PERF_SEL_MIU_WRITE_REQUEST_SENT__RV1X_RV2X = 0x00000022, - CPG_PERF_SEL_MIU_READ_REQUEST_SENT__RV1X_RV2X = 0x00000023, - CPG_PERF_SEL_CE_STALL_RAM_DUMP__RV1X_RV2X = 0x00000024, - CPG_PERF_SEL_CE_STALL_RAM_WRITE__RV1X_RV2X = 0x00000025, - CPG_PERF_SEL_CE_STALL_ON_INC_FIFO__RV1X_RV2X = 0x00000026, - CPG_PERF_SEL_CE_STALL_ON_WR_RAM_FIFO__RV1X_RV2X = 0x00000027, - CPG_PERF_SEL_CE_STALL_ON_DATA_FROM_MIU__RV1X_RV2X = 0x00000028, - CPG_PERF_SEL_CE_STALL_ON_DATA_FROM_ROQ__RV1X_RV2X = 0x00000029, - CPG_PERF_SEL_CE_STALL_ON_CE_BUFFER_FLAG__RV1X_RV2X = 0x0000002a, - CPG_PERF_SEL_CE_STALL_ON_DE_COUNTER__RV1X_RV2X = 0x0000002b, - CPG_PERF_SEL_TCIU_STALL_WAIT_ON_FREE__RV1X_RV2X = 0x0000002c, - CPG_PERF_SEL_TCIU_STALL_WAIT_ON_TAGS__RV1X_RV2X = 0x0000002d, - CPG_PERF_SEL_UTCL2IU_STALL_WAIT_ON_FREE__RV1X_RV2X = 0x0000002e, - CPG_PERF_SEL_UTCL2IU_STALL_WAIT_ON_TAGS__RV1X_RV2X = 0x0000002f, - CPG_PERF_SEL_UTCL1_STALL_ON_TRANSLATION__RV1X_RV2X = 0x00000030, - CPG_PERF_SEL_TCIU_WRITE_REQUEST_SENT__RV1X_RV2X = 0x00000031, - CPG_PERF_SEL_CPG_STAT_BUSY__RV1X_RV2X = 0x00000032, - CPG_PERF_SEL_CPG_STAT_IDLE__RV1X_RV2X = 0x00000033, - CPG_PERF_SEL_CPG_STAT_STALL__RV1X_RV2X = 0x00000034, - CPG_PERF_SEL_CPG_TCIU_BUSY__RV1X_RV2X = 0x00000035, - CPG_PERF_SEL_CPG_TCIU_IDLE__RV1X_RV2X = 0x00000036, - CPF_PERF_SEL_CPG_TCIU_STALL__RV1X_RV2X = 0x00000037, - CPG_PERF_SEL_CPG_UTCL2IU_BUSY__RV1X_RV2X = 0x00000038, - CPG_PERF_SEL_CPG_UTCL2IU_IDLE__RV1X_RV2X = 0x00000039, - CPG_PERF_SEL_CPG_UTCL2IU_STALL__RV1X_RV2X = 0x0000003a, - CPG_PERF_SEL_GFX9_GAP__VG10_VG12_VG20_RN = 0x0000000f, - CPG_PERF_SEL_SSU_STALLED_ON_CLEAN_SIGNALS__VG10_VG12_VG20_RN = 0x00000010, - CPG_PERF_SEL_QU_STALLED_ON_EOP_DONE_PULSE__VG10_VG12_VG20_RN = 0x00000011, - CPG_PERF_SEL_QU_STALLED_ON_EOP_DONE_WR_CONFIRM__VG10_VG12_VG20_RN = 0x00000012, - CPG_PERF_SEL_PFP_STALLED_ON_CSF_READY__VG10_VG12_VG20_RN = 0x00000013, - CPG_PERF_SEL_PFP_STALLED_ON_MEQ_READY__VG10_VG12_VG20_RN = 0x00000014, - CPG_PERF_SEL_PFP_STALLED_ON_RCIU_READY__VG10_VG12_VG20_RN = 0x00000015, - CPG_PERF_SEL_PFP_STALLED_FOR_DATA_FROM_ROQ__VG10_VG12_VG20_RN = 0x00000016, - CPG_PERF_SEL_ME_STALLED_FOR_DATA_FROM_PFP__VG10_VG12_VG20_RN = 0x00000017, - CPG_PERF_SEL_ME_STALLED_FOR_DATA_FROM_STQ__VG10_VG12_VG20_RN = 0x00000018, - CPG_PERF_SEL_ME_STALLED_ON_NO_AVAIL_GFX_CNTX__VG10_VG12_VG20_RN = 0x00000019, - CPG_PERF_SEL_ME_STALLED_WRITING_TO_RCIU__VG10_VG12_VG20_RN = 0x0000001a, - CPG_PERF_SEL_ME_STALLED_WRITING_CONSTANTS__VG10_VG12_VG20_RN = 0x0000001b, - CPG_PERF_SEL_ME_STALLED_ON_PARTIAL_FLUSH__VG10_VG12_VG20_RN = 0x0000001c, - CPG_PERF_SEL_ME_WAIT_ON_CE_COUNTER__VG10_VG12_VG20_RN = 0x0000001d, - CPG_PERF_SEL_ME_WAIT_ON_AVAIL_BUFFER__VG10_VG12_VG20_RN = 0x0000001e, - CPG_PERF_SEL_SEMAPHORE_BUSY_POLLING_FOR_PASS__VG10_VG12_VG20_RN = 0x0000001f, - CPG_PERF_SEL_LOAD_STALLED_ON_SET_COHERENCY__VG10_VG12_VG20_RN = 0x00000020, - CPG_PERF_SEL_DYNAMIC_CLK_VALID__VG10_VG12_VG20_RN = 0x00000021, - CPG_PERF_SEL_REGISTER_CLK_VALID__VG10_VG12_VG20_RN = 0x00000022, - CPG_PERF_SEL_MIU_WRITE_REQUEST_SENT__VG10_VG12_VG20_RN = 0x00000023, - CPG_PERF_SEL_MIU_READ_REQUEST_SENT__VG10_VG12_VG20_RN = 0x00000024, - CPG_PERF_SEL_CE_STALL_RAM_DUMP__VG10_VG12_VG20_RN = 0x00000025, - CPG_PERF_SEL_CE_STALL_RAM_WRITE__VG10_VG12_VG20_RN = 0x00000026, - CPG_PERF_SEL_CE_STALL_ON_INC_FIFO__VG10_VG12_VG20_RN = 0x00000027, - CPG_PERF_SEL_CE_STALL_ON_WR_RAM_FIFO__VG10_VG12_VG20_RN = 0x00000028, - CPG_PERF_SEL_CE_STALL_ON_DATA_FROM_MIU__VG10_VG12_VG20_RN = 0x00000029, - CPG_PERF_SEL_CE_STALL_ON_DATA_FROM_ROQ__VG10_VG12_VG20_RN = 0x0000002a, - CPG_PERF_SEL_CE_STALL_ON_CE_BUFFER_FLAG__VG10_VG12_VG20_RN = 0x0000002b, - CPG_PERF_SEL_CE_STALL_ON_DE_COUNTER__VG10_VG12_VG20_RN = 0x0000002c, - CPG_PERF_SEL_TCIU_STALL_WAIT_ON_FREE__VG10_VG12_VG20_RN = 0x0000002d, - CPG_PERF_SEL_TCIU_STALL_WAIT_ON_TAGS__VG10_VG12_VG20_RN = 0x0000002e, - CPG_PERF_SEL_UTCL2IU_STALL_WAIT_ON_FREE__VG10_VG12_VG20_RN = 0x0000002f, - CPG_PERF_SEL_UTCL2IU_STALL_WAIT_ON_TAGS__VG10_VG12_VG20_RN = 0x00000030, - CPG_PERF_SEL_UTCL1_STALL_ON_TRANSLATION__VG10_VG12_VG20_RN = 0x00000031, - CPG_PERF_SEL_TCIU_WRITE_REQUEST_SENT__VG10_VG12_VG20_RN = 0x00000032, - CPG_PERF_SEL_CPG_STAT_BUSY__VG10_VG12_VG20_RN = 0x00000033, - CPG_PERF_SEL_CPG_STAT_IDLE__VG10_VG12_VG20_RN = 0x00000034, - CPG_PERF_SEL_CPG_STAT_STALL__VG10_VG12_VG20_RN = 0x00000035, - CPG_PERF_SEL_CPG_TCIU_BUSY__VG10_VG12_VG20_RN = 0x00000036, - CPG_PERF_SEL_CPG_TCIU_IDLE__VG10_VG12_VG20_RN = 0x00000037, - CPF_PERF_SEL_CPG_TCIU_STALL__VG10_VG12_VG20_RN = 0x00000038, - CPG_PERF_SEL_CPG_UTCL2IU_BUSY__VG10_VG12_VG20_RN = 0x00000039, - CPG_PERF_SEL_CPG_UTCL2IU_IDLE__VG10_VG12_VG20_RN = 0x0000003a, - CPG_PERF_SEL_CPG_UTCL2IU_STALL__VG10_VG12_VG20_RN = 0x0000003b, -} CPG_PERFCOUNT_SEL; - -constexpr unsigned int MaxCpgPerfcountSelVg10_Vg12_Vg20_Rn = CPG_PERF_SEL_CPG_UTCL2IU_STALL__VG10_VG12_VG20_RN; -constexpr unsigned int MaxCpgPerfcountSelRv1x_Rv2x = CPG_PERF_SEL_CPG_UTCL2IU_STALL__RV1X_RV2X; -constexpr unsigned int MaxCpgPerfcountSelGfx103 = CPG_PERF_SEL_DMA_FETCHER_STALLED_ON_ROQ_FULL__GFX10COREPLUS; -constexpr unsigned int MaxCpgPerfcountSelGfx101 = CPG_PERF_SEL_DMA_FETCHER_STALLED_ON_ROQ_FULL__GFX10COREPLUS; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxCpgPerfcountSelGfx11 = CPG_PERF_SEL_PFP_VGTDMA_DB_ROQ_DATA_STALL1__GFX11; -#endif - -typedef enum CP_PERFMON_ENABLE_MODE { - CP_PERFMON_ENABLE_MODE_ALWAYS_COUNT = 0x00000000, - CP_PERFMON_ENABLE_MODE_RESERVED_1 = 0x00000001, - CP_PERFMON_ENABLE_MODE_COUNT_CONTEXT_TRUE = 0x00000002, - CP_PERFMON_ENABLE_MODE_COUNT_CONTEXT_FALSE = 0x00000003, -} CP_PERFMON_ENABLE_MODE; - -typedef enum CP_PERFMON_STATE { - CP_PERFMON_STATE_DISABLE_AND_RESET = 0x00000000, - CP_PERFMON_STATE_START_COUNTING = 0x00000001, - CP_PERFMON_STATE_STOP_COUNTING = 0x00000002, - CP_PERFMON_STATE_RESERVED_3 = 0x00000003, - CP_PERFMON_STATE_DISABLE_AND_RESET_PHANTOM = 0x00000004, - CP_PERFMON_STATE_COUNT_AND_DUMP_PHANTOM = 0x00000005, -} CP_PERFMON_STATE; - -typedef enum DbMemArbWatermarks { - TRANSFERRED_64_BYTES = 0x00000000, - TRANSFERRED_128_BYTES = 0x00000001, - TRANSFERRED_256_BYTES = 0x00000002, - TRANSFERRED_512_BYTES = 0x00000003, - TRANSFERRED_1024_BYTES = 0x00000004, - TRANSFERRED_2048_BYTES = 0x00000005, - TRANSFERRED_4096_BYTES = 0x00000006, - TRANSFERRED_8192_BYTES = 0x00000007, -} DbMemArbWatermarks; - -typedef enum DbPRTFaultBehavior { - FAULT_ZERO = 0x00000000, - FAULT_ONE = 0x00000001, - FAULT_FAIL = 0x00000002, - FAULT_PASS = 0x00000003, -} DbPRTFaultBehavior; - -typedef enum DbPSLControl { - PSLC_AUTO = 0x00000000, - PSLC_ON_HANG_ONLY = 0x00000001, - PSLC_ASAP = 0x00000002, - PSLC_COUNTDOWN = 0x00000003, -} DbPSLControl; - -typedef enum DepthArray { - ARRAY_2D_ALT_DEPTH = 0x00000000, - ARRAY_2D_DEPTH = 0x00000001, -} DepthArray; - -typedef enum DepthFormat { - DEPTH_INVALID = 0x00000000, - DEPTH_16 = 0x00000001, - DEPTH_X8_24 = 0x00000002, - DEPTH_8_24 = 0x00000003, - DEPTH_X8_24_FLOAT = 0x00000004, - DEPTH_8_24_FLOAT = 0x00000005, - DEPTH_32_FLOAT = 0x00000006, - DEPTH_X24_8_32_FLOAT = 0x00000007, -} DepthFormat; - -#if CHIP_HDR_NAVI21 || CHIP_HDR_NAVI22 || CHIP_HDR_NAVI23 || CHIP_HDR_NAVI24 || CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -typedef enum DF_MALL_PERF_SEL { -#if CHIP_HDR_NAVI21 - DF_MALL_PERF_SEL_ML_MTQ_OCC__NV21 = 0x00000000, - DF_MALL_PERF_SEL_ML_MRS_OCC__NV21 = 0x00000001, - DF_MALL_PERF_SEL_ML_REQ__NV21 = 0x00000002, - DF_MALL_PERF_SEL_ML_OPTYPES__NV21 = 0x00000003, - DF_MALL_PERF_SEL_ML_CACHE_LKUP_RSP0__NV21 = 0x00000004, - DF_MALL_PERF_SEL_ML_CACHE_LKUP_RSP1__NV21 = 0x00000005, - DF_MALL_PERF_SEL_ML_CACHE_TAG_PICK_STALL__NV21 = 0x00000006, - DF_MALL_PERF_SEL_ML_UMC_PICK_STALL__NV21 = 0x00000007, - DF_MALL_PERF_SEL_ML_MISC0__NV21 = 0x00000008, - DF_MALL_PERF_SEL_ML_MISC1__NV21 = 0x00000009, - DF_MALL_PERF_SEL_MALL_SDP_AVG_LAT_TRANS_CNT__NV21 = 0x00000030, - DF_MALL_PERF_SEL_MALL_SDP_AVG_LAT_CYCLE_CNT__NV21 = 0x00000031, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT50__NV21 = 0x00000032, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT100__NV21 = 0x00000033, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT150__NV21 = 0x00000034, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT200__NV21 = 0x00000035, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT500__NV21 = 0x00000036, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV21 = 0x00000037, -#endif -#if CHIP_HDR_NAVI22 - DF_MALL_PERF_SEL_ML_MTQ_OCC__NV22 = 0x00000000, - DF_MALL_PERF_SEL_ML_MRS_OCC__NV22 = 0x00000001, - DF_MALL_PERF_SEL_ML_REQ__NV22 = 0x00000002, - DF_MALL_PERF_SEL_ML_OPTYPES__NV22 = 0x00000003, - DF_MALL_PERF_SEL_ML_CACHE_LKUP_RSP0__NV22 = 0x00000004, - DF_MALL_PERF_SEL_ML_CACHE_LKUP_RSP1__NV22 = 0x00000005, - DF_MALL_PERF_SEL_ML_CACHE_TAG_PICK_STALL__NV22 = 0x00000006, - DF_MALL_PERF_SEL_ML_UMC_PICK_STALL__NV22 = 0x00000007, - DF_MALL_PERF_SEL_ML_MISC0__NV22 = 0x00000008, - DF_MALL_PERF_SEL_ML_MISC1__NV22 = 0x00000009, - DF_MALL_PERF_SEL_MALL_SDP_AVG_LAT_TRANS_CNT__NV22 = 0x00000030, - DF_MALL_PERF_SEL_MALL_SDP_AVG_LAT_CYCLE_CNT__NV22 = 0x00000031, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT50__NV22 = 0x00000032, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT100__NV22 = 0x00000033, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT150__NV22 = 0x00000034, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT200__NV22 = 0x00000035, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT500__NV22 = 0x00000036, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV22 = 0x00000037, -#endif -#if CHIP_HDR_NAVI23 - DF_MALL_PERF_SEL_ML_MTQ_OCC__NV23 = 0x00000000, - DF_MALL_PERF_SEL_ML_MRS_OCC__NV23 = 0x00000001, - DF_MALL_PERF_SEL_ML_REQ__NV23 = 0x00000002, - DF_MALL_PERF_SEL_ML_OPTYPES__NV23 = 0x00000003, - DF_MALL_PERF_SEL_ML_CACHE_LKUP_RSP0__NV23 = 0x00000004, - DF_MALL_PERF_SEL_ML_CACHE_LKUP_RSP1__NV23 = 0x00000005, - DF_MALL_PERF_SEL_ML_CACHE_TAG_PICK_STALL__NV23 = 0x00000006, - DF_MALL_PERF_SEL_ML_UMC_PICK_STALL__NV23 = 0x00000007, - DF_MALL_PERF_SEL_ML_MISC0__NV23 = 0x00000008, - DF_MALL_PERF_SEL_ML_MISC1__NV23 = 0x00000009, - DF_MALL_PERF_SEL_MALL_SDP_AVG_LAT_TRANS_CNT__NV23 = 0x00000030, - DF_MALL_PERF_SEL_MALL_SDP_AVG_LAT_CYCLE_CNT__NV23 = 0x00000031, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT50__NV23 = 0x00000032, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT100__NV23 = 0x00000033, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT150__NV23 = 0x00000034, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT200__NV23 = 0x00000035, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT500__NV23 = 0x00000036, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV23 = 0x00000037, -#endif -#if CHIP_HDR_NAVI24 - DF_MALL_PERF_SEL_ML_MTQ_OCC__NV24 = 0x00000000, - DF_MALL_PERF_SEL_ML_MRS_OCC__NV24 = 0x00000001, - DF_MALL_PERF_SEL_ML_REQ__NV24 = 0x00000002, - DF_MALL_PERF_SEL_ML_OPTYPES__NV24 = 0x00000003, - DF_MALL_PERF_SEL_ML_CACHE_LKUP_RSP0__NV24 = 0x00000004, - DF_MALL_PERF_SEL_ML_CACHE_LKUP_RSP1__NV24 = 0x00000005, - DF_MALL_PERF_SEL_ML_CACHE_TAG_PICK_STALL__NV24 = 0x00000006, - DF_MALL_PERF_SEL_ML_UMC_PICK_STALL__NV24 = 0x00000007, - DF_MALL_PERF_SEL_ML_MISC0__NV24 = 0x00000008, - DF_MALL_PERF_SEL_ML_MISC1__NV24 = 0x00000009, - DF_MALL_PERF_SEL_MALL_SDP_AVG_LAT_TRANS_CNT__NV24 = 0x00000030, - DF_MALL_PERF_SEL_MALL_SDP_AVG_LAT_CYCLE_CNT__NV24 = 0x00000031, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT50__NV24 = 0x00000032, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT100__NV24 = 0x00000033, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT150__NV24 = 0x00000034, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT200__NV24 = 0x00000035, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT500__NV24 = 0x00000036, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV24 = 0x00000037, -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - DF_MALL_PERF_SEL_ML_MTQ_OCC__NV3X = 0x00000000, - DF_MALL_PERF_SEL_ML_MRS_OCC__NV3X = 0x00000001, - DF_MALL_PERF_SEL_ML_REQ__NV3X = 0x00000002, - DF_MALL_PERF_SEL_ML_OPTYPES__NV3X = 0x00000003, - DF_MALL_PERF_SEL_ML_CACHE_LKUP_RSP0__NV3X = 0x00000004, - DF_MALL_PERF_SEL_ML_CACHE_LKUP_RSP1__NV3X = 0x00000005, - DF_MALL_PERF_SEL_ML_CACHE_TAG_PICK_STALL__NV3X = 0x00000006, - DF_MALL_PERF_SEL_ML_UMC_PICK_STALL__NV3X = 0x00000007, - DF_MALL_PERF_SEL_ML_MISC0__NV3X = 0x00000008, - DF_MALL_PERF_SEL_ML_MISC1__NV3X = 0x00000009, - DF_MALL_PERF_SEL_MALL_SDP_AVG_LAT_TRANS_CNT__NV3X = 0x00000030, - DF_MALL_PERF_SEL_MALL_SDP_AVG_LAT_CYCLE_CNT__NV3X = 0x00000031, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT50__NV3X = 0x00000032, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT100__NV3X = 0x00000033, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT150__NV3X = 0x00000034, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT200__NV3X = 0x00000035, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT500__NV3X = 0x00000036, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV3X = 0x00000037, -#endif -} DF_MALL_PERF_SEL; - -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -constexpr unsigned int MaxDfMallPerfSelNv3x = DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV3X; -#endif -#if CHIP_HDR_NAVI24 -constexpr unsigned int MaxDfMallPerfSelNv24 = DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV24; -#endif -#if CHIP_HDR_NAVI23 -constexpr unsigned int MaxDfMallPerfSelNv23 = DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV23; -#endif -#if CHIP_HDR_NAVI22 -constexpr unsigned int MaxDfMallPerfSelNv22 = DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV22; -#endif -#if CHIP_HDR_NAVI21 -constexpr unsigned int MaxDfMallPerfSelNv21 = DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV21; -#endif -#endif - -typedef enum ForceControl { - FORCE_OFF = 0x00000000, - FORCE_ENABLE = 0x00000001, - FORCE_DISABLE = 0x00000002, - FORCE_RESERVED = 0x00000003, -} ForceControl; - -typedef enum GCRPerfSel { - GCR_PERF_SEL_NONE = 0x00000000, -#if CHIP_HDR_PHOENIX1 - GCR_PERF_SEL_RLC_ALL_REQ__APU11 = 0x00000011, - GCR_PERF_SEL_RLC_GL2_RANGE_REQ__APU11 = 0x00000012, - GCR_PERF_SEL_RLC_GL2_RANGE_LT16K_REQ__APU11 = 0x00000013, - GCR_PERF_SEL_RLC_GL2_RANGE_16K_REQ__APU11 = 0x00000014, - GCR_PERF_SEL_RLC_GL2_RANGE_GT16K_REQ__APU11 = 0x00000015, - GCR_PERF_SEL_RLC_GL2_ALL_REQ__APU11 = 0x00000016, - GCR_PERF_SEL_RLC_GL1_RANGE_REQ__APU11 = 0x00000017, - GCR_PERF_SEL_RLC_GL1_RANGE_LT16K_REQ__APU11 = 0x00000018, - GCR_PERF_SEL_RLC_GL1_RANGE_16K_REQ__APU11 = 0x00000019, - GCR_PERF_SEL_RLC_GL1_RANGE_GT16K_REQ__APU11 = 0x0000001a, - GCR_PERF_SEL_RLC_GL1_ALL_REQ__APU11 = 0x0000001b, - GCR_PERF_SEL_RLC_METADATA_REQ__APU11 = 0x0000001c, - GCR_PERF_SEL_RLC_SQC_DATA_REQ__APU11 = 0x0000001d, - GCR_PERF_SEL_RLC_SQC_INST_REQ__APU11 = 0x0000001e, - GCR_PERF_SEL_RLC_TCP_REQ__APU11 = 0x0000001f, - GCR_PERF_SEL_RLC_GL1_TLB_SHOOTDOWN_REQ__APU11 = 0x00000020, - GCR_PERF_SEL_PM_ALL_REQ__APU11 = 0x0000005e, - GCR_PERF_SEL_PM_GL2_RANGE_REQ__APU11 = 0x0000005f, - GCR_PERF_SEL_PM_GL2_RANGE_LT16K_REQ__APU11 = 0x00000060, - GCR_PERF_SEL_PM_GL2_RANGE_16K_REQ__APU11 = 0x00000061, - GCR_PERF_SEL_PM_GL2_RANGE_GT16K_REQ__APU11 = 0x00000062, - GCR_PERF_SEL_PM_GL2_ALL_REQ__APU11 = 0x00000063, - GCR_PERF_SEL_PM_GL1_RANGE_REQ__APU11 = 0x00000064, - GCR_PERF_SEL_PM_GL1_RANGE_LT16K_REQ__APU11 = 0x00000065, - GCR_PERF_SEL_PM_GL1_RANGE_16K_REQ__APU11 = 0x00000066, - GCR_PERF_SEL_PM_GL1_RANGE_GT16K_REQ__APU11 = 0x00000067, - GCR_PERF_SEL_PM_GL1_ALL_REQ__APU11 = 0x00000068, - GCR_PERF_SEL_PM_METADATA_REQ__APU11 = 0x00000069, - GCR_PERF_SEL_PM_SQC_DATA_REQ__APU11 = 0x0000006a, - GCR_PERF_SEL_PM_SQC_INST_REQ__APU11 = 0x0000006b, - GCR_PERF_SEL_PM_TCP_REQ__APU11 = 0x0000006c, - GCR_PERF_SEL_PM_GL1_TLB_SHOOTDOWN_REQ__APU11 = 0x0000006d, - GCR_PERF_SEL_PIO_ALL_REQ__APU11 = 0x0000006e, - GCR_PERF_SEL_PIO_GL2_RANGE_REQ__APU11 = 0x0000006f, - GCR_PERF_SEL_PIO_GL2_RANGE_LT16K_REQ__APU11 = 0x00000070, - GCR_PERF_SEL_PIO_GL2_RANGE_16K_REQ__APU11 = 0x00000071, - GCR_PERF_SEL_PIO_GL2_RANGE_GT16K_REQ__APU11 = 0x00000072, - GCR_PERF_SEL_PIO_GL2_ALL_REQ__APU11 = 0x00000073, - GCR_PERF_SEL_PIO_GL1_RANGE_REQ__APU11 = 0x00000074, - GCR_PERF_SEL_PIO_GL1_RANGE_LT16K_REQ__APU11 = 0x00000075, - GCR_PERF_SEL_PIO_GL1_RANGE_16K_REQ__APU11 = 0x00000076, - GCR_PERF_SEL_PIO_GL1_RANGE_GT16K_REQ__APU11 = 0x00000077, - GCR_PERF_SEL_PIO_GL1_ALL_REQ__APU11 = 0x00000078, - GCR_PERF_SEL_PIO_METADATA_REQ__APU11 = 0x00000079, - GCR_PERF_SEL_PIO_SQC_DATA_REQ__APU11 = 0x0000007a, - GCR_PERF_SEL_PIO_SQC_INST_REQ__APU11 = 0x0000007b, - GCR_PERF_SEL_PIO_TCP_REQ__APU11 = 0x0000007c, - GCR_PERF_SEL_PIO_GL1_TLB_SHOOTDOWN_REQ__APU11 = 0x0000007d, -#endif - GCR_PERF_SEL_CPG_ALL_REQ__GFX101 = 0x00000021, - GCR_PERF_SEL_CPG_GL2_RANGE_REQ__GFX101 = 0x00000022, - GCR_PERF_SEL_CPG_GL2_RANGE_LT16K_REQ__GFX101 = 0x00000023, - GCR_PERF_SEL_CPG_GL2_RANGE_16K_REQ__GFX101 = 0x00000024, - GCR_PERF_SEL_CPG_GL2_RANGE_GT16K_REQ__GFX101 = 0x00000025, - GCR_PERF_SEL_CPG_GL2_ALL_REQ__GFX101 = 0x00000026, - GCR_PERF_SEL_CPG_GL1_RANGE_REQ__GFX101 = 0x00000027, - GCR_PERF_SEL_CPG_GL1_RANGE_LT16K_REQ__GFX101 = 0x00000028, - GCR_PERF_SEL_CPG_GL1_RANGE_16K_REQ__GFX101 = 0x00000029, - GCR_PERF_SEL_CPG_GL1_RANGE_GT16K_REQ__GFX101 = 0x0000002a, - GCR_PERF_SEL_CPG_GL1_ALL_REQ__GFX101 = 0x0000002b, - GCR_PERF_SEL_CPG_METADATA_REQ__GFX101 = 0x0000002c, - GCR_PERF_SEL_CPG_SQC_DATA_REQ__GFX101 = 0x0000002d, - GCR_PERF_SEL_CPG_SQC_INST_REQ__GFX101 = 0x0000002e, - GCR_PERF_SEL_CPG_TCP_REQ__GFX101 = 0x0000002f, - GCR_PERF_SEL_CPG_TCP_TLB_SHOOTDOWN_REQ__GFX101 = 0x00000030, - GCR_PERF_SEL_CPC_ALL_REQ__GFX101 = 0x00000031, - GCR_PERF_SEL_CPC_GL2_RANGE_REQ__GFX101 = 0x00000032, - GCR_PERF_SEL_CPC_GL2_RANGE_LT16K_REQ__GFX101 = 0x00000033, - GCR_PERF_SEL_CPC_GL2_RANGE_16K_REQ__GFX101 = 0x00000034, - GCR_PERF_SEL_CPC_GL2_RANGE_GT16K_REQ__GFX101 = 0x00000035, - GCR_PERF_SEL_CPC_GL2_ALL_REQ__GFX101 = 0x00000036, - GCR_PERF_SEL_CPC_GL1_RANGE_REQ__GFX101 = 0x00000037, - GCR_PERF_SEL_CPC_GL1_RANGE_LT16K_REQ__GFX101 = 0x00000038, - GCR_PERF_SEL_CPC_GL1_RANGE_16K_REQ__GFX101 = 0x00000039, - GCR_PERF_SEL_CPC_GL1_RANGE_GT16K_REQ__GFX101 = 0x0000003a, - GCR_PERF_SEL_CPC_GL1_ALL_REQ__GFX101 = 0x0000003b, - GCR_PERF_SEL_CPC_METADATA_REQ__GFX101 = 0x0000003c, - GCR_PERF_SEL_CPC_SQC_DATA_REQ__GFX101 = 0x0000003d, - GCR_PERF_SEL_CPC_SQC_INST_REQ__GFX101 = 0x0000003e, - GCR_PERF_SEL_CPC_TCP_REQ__GFX101 = 0x0000003f, - GCR_PERF_SEL_CPC_TCP_TLB_SHOOTDOWN_REQ__GFX101 = 0x00000040, - GCR_PERF_SEL_CPC_TCP_TLB_SHOOTDOWN_REQ__GFX103 = 0x00000030, - GCR_PERF_SEL_CPG_TCP_TLB_SHOOTDOWN_REQ__GFX103 = 0x00000040, - GCR_PERF_SEL_CPC_ALL_REQ__GFX103COREPLUS = 0x00000021, - GCR_PERF_SEL_CPC_GL2_RANGE_REQ__GFX103COREPLUS = 0x00000022, - GCR_PERF_SEL_CPC_GL2_RANGE_LT16K_REQ__GFX103COREPLUS = 0x00000023, - GCR_PERF_SEL_CPC_GL2_RANGE_16K_REQ__GFX103COREPLUS = 0x00000024, - GCR_PERF_SEL_CPC_GL2_RANGE_GT16K_REQ__GFX103COREPLUS = 0x00000025, - GCR_PERF_SEL_CPC_GL2_ALL_REQ__GFX103COREPLUS = 0x00000026, - GCR_PERF_SEL_CPC_GL1_RANGE_REQ__GFX103COREPLUS = 0x00000027, - GCR_PERF_SEL_CPC_GL1_RANGE_LT16K_REQ__GFX103COREPLUS = 0x00000028, - GCR_PERF_SEL_CPC_GL1_RANGE_16K_REQ__GFX103COREPLUS = 0x00000029, - GCR_PERF_SEL_CPC_GL1_RANGE_GT16K_REQ__GFX103COREPLUS = 0x0000002a, - GCR_PERF_SEL_CPC_GL1_ALL_REQ__GFX103COREPLUS = 0x0000002b, - GCR_PERF_SEL_CPC_METADATA_REQ__GFX103COREPLUS = 0x0000002c, - GCR_PERF_SEL_CPC_SQC_DATA_REQ__GFX103COREPLUS = 0x0000002d, - GCR_PERF_SEL_CPC_SQC_INST_REQ__GFX103COREPLUS = 0x0000002e, - GCR_PERF_SEL_CPC_TCP_REQ__GFX103COREPLUS = 0x0000002f, - GCR_PERF_SEL_CPG_ALL_REQ__GFX103COREPLUS = 0x00000031, - GCR_PERF_SEL_CPG_GL2_RANGE_REQ__GFX103COREPLUS = 0x00000032, - GCR_PERF_SEL_CPG_GL2_RANGE_LT16K_REQ__GFX103COREPLUS = 0x00000033, - GCR_PERF_SEL_CPG_GL2_RANGE_16K_REQ__GFX103COREPLUS = 0x00000034, - GCR_PERF_SEL_CPG_GL2_RANGE_GT16K_REQ__GFX103COREPLUS = 0x00000035, - GCR_PERF_SEL_CPG_GL2_ALL_REQ__GFX103COREPLUS = 0x00000036, - GCR_PERF_SEL_CPG_GL1_RANGE_REQ__GFX103COREPLUS = 0x00000037, - GCR_PERF_SEL_CPG_GL1_RANGE_LT16K_REQ__GFX103COREPLUS = 0x00000038, - GCR_PERF_SEL_CPG_GL1_RANGE_16K_REQ__GFX103COREPLUS = 0x00000039, - GCR_PERF_SEL_CPG_GL1_RANGE_GT16K_REQ__GFX103COREPLUS = 0x0000003a, - GCR_PERF_SEL_CPG_GL1_ALL_REQ__GFX103COREPLUS = 0x0000003b, - GCR_PERF_SEL_CPG_METADATA_REQ__GFX103COREPLUS = 0x0000003c, - GCR_PERF_SEL_CPG_SQC_DATA_REQ__GFX103COREPLUS = 0x0000003d, - GCR_PERF_SEL_CPG_SQC_INST_REQ__GFX103COREPLUS = 0x0000003e, - GCR_PERF_SEL_CPG_TCP_REQ__GFX103COREPLUS = 0x0000003f, - GCR_PERF_SEL_SDMA0_TCP_TLB_SHOOTDOWN_REQ__GFX10CORE = 0x00000010, - GCR_PERF_SEL_CPF_TCP_TLB_SHOOTDOWN_REQ__GFX10CORE = 0x00000050, - GCR_PERF_SEL_SDMA0_ALL_REQ__GFX10COREPLUS = 0x00000001, - GCR_PERF_SEL_SDMA0_GL2_RANGE_REQ__GFX10COREPLUS = 0x00000002, - GCR_PERF_SEL_SDMA0_GL2_RANGE_LT16K_REQ__GFX10COREPLUS = 0x00000003, - GCR_PERF_SEL_SDMA0_GL2_RANGE_16K_REQ__GFX10COREPLUS = 0x00000004, - GCR_PERF_SEL_SDMA0_GL2_RANGE_GT16K_REQ__GFX10COREPLUS = 0x00000005, - GCR_PERF_SEL_SDMA0_GL2_ALL_REQ__GFX10COREPLUS = 0x00000006, - GCR_PERF_SEL_SDMA0_GL1_RANGE_REQ__GFX10COREPLUS = 0x00000007, - GCR_PERF_SEL_SDMA0_GL1_RANGE_LT16K_REQ__GFX10COREPLUS = 0x00000008, - GCR_PERF_SEL_SDMA0_GL1_RANGE_16K_REQ__GFX10COREPLUS = 0x00000009, - GCR_PERF_SEL_SDMA0_GL1_RANGE_GT16K_REQ__GFX10COREPLUS = 0x0000000a, - GCR_PERF_SEL_SDMA0_GL1_ALL_REQ__GFX10COREPLUS = 0x0000000b, - GCR_PERF_SEL_SDMA0_METADATA_REQ__GFX10COREPLUS = 0x0000000c, - GCR_PERF_SEL_SDMA0_SQC_DATA_REQ__GFX10COREPLUS = 0x0000000d, - GCR_PERF_SEL_SDMA0_SQC_INST_REQ__GFX10COREPLUS = 0x0000000e, - GCR_PERF_SEL_SDMA0_TCP_REQ__GFX10COREPLUS = 0x0000000f, - GCR_PERF_SEL_CPF_ALL_REQ__GFX10COREPLUS = 0x00000041, - GCR_PERF_SEL_CPF_GL2_RANGE_REQ__GFX10COREPLUS = 0x00000042, - GCR_PERF_SEL_CPF_GL2_RANGE_LT16K_REQ__GFX10COREPLUS = 0x00000043, - GCR_PERF_SEL_CPF_GL2_RANGE_16K_REQ__GFX10COREPLUS = 0x00000044, - GCR_PERF_SEL_CPF_GL2_RANGE_GT16K_REQ__GFX10COREPLUS = 0x00000045, - GCR_PERF_SEL_CPF_GL2_ALL_REQ__GFX10COREPLUS = 0x00000046, - GCR_PERF_SEL_CPF_GL1_RANGE_REQ__GFX10COREPLUS = 0x00000047, - GCR_PERF_SEL_CPF_GL1_RANGE_LT16K_REQ__GFX10COREPLUS = 0x00000048, - GCR_PERF_SEL_CPF_GL1_RANGE_16K_REQ__GFX10COREPLUS = 0x00000049, - GCR_PERF_SEL_CPF_GL1_RANGE_GT16K_REQ__GFX10COREPLUS = 0x0000004a, - GCR_PERF_SEL_CPF_GL1_ALL_REQ__GFX10COREPLUS = 0x0000004b, - GCR_PERF_SEL_CPF_METADATA_REQ__GFX10COREPLUS = 0x0000004c, - GCR_PERF_SEL_CPF_SQC_DATA_REQ__GFX10COREPLUS = 0x0000004d, - GCR_PERF_SEL_CPF_SQC_INST_REQ__GFX10COREPLUS = 0x0000004e, - GCR_PERF_SEL_CPF_TCP_REQ__GFX10COREPLUS = 0x0000004f, - GCR_PERF_SEL_VIRT_REQ__GFX10COREPLUS = 0x00000051, - GCR_PERF_SEL_PHY_REQ__GFX10COREPLUS = 0x00000052, - GCR_PERF_SEL_TLB_SHOOTDOWN_HEAVY_REQ__GFX10COREPLUS = 0x00000053, - GCR_PERF_SEL_TLB_SHOOTDOWN_LIGHT_REQ__GFX10COREPLUS = 0x00000054, - GCR_PERF_SEL_ALL_REQ__GFX10COREPLUS = 0x00000055, - GCR_PERF_SEL_CLK_FOR_PHY_OUTSTANDING_REQ__GFX10COREPLUS = 0x00000056, - GCR_PERF_SEL_CLK_FOR_VIRT_OUTSTANDING_REQ__GFX10COREPLUS = 0x00000057, - GCR_PERF_SEL_CLK_FOR_ALL_OUTSTANDING_REQ__GFX10COREPLUS = 0x00000058, - GCR_PERF_SEL_UTCL2_REQ__GFX10COREPLUS = 0x00000059, - GCR_PERF_SEL_UTCL2_RET__GFX10COREPLUS = 0x0000005a, - GCR_PERF_SEL_UTCL2_OUT_OF_CREDIT_EVENT__GFX10COREPLUS = 0x0000005b, - GCR_PERF_SEL_UTCL2_INFLIGHT_REQ__GFX10COREPLUS = 0x0000005c, - GCR_PERF_SEL_UTCL2_FILTERED_RET__GFX10COREPLUS = 0x0000005d, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - GCR_PERF_SEL_SDMA0_GL1_TLB_SHOOTDOWN_REQ__GFX11 = 0x00000010, - GCR_PERF_SEL_CPC_GL1_TLB_SHOOTDOWN_REQ__GFX11 = 0x00000030, - GCR_PERF_SEL_CPG_GL1_TLB_SHOOTDOWN_REQ__GFX11 = 0x00000040, - GCR_PERF_SEL_CPF_GL1_TLB_SHOOTDOWN_REQ__GFX11 = 0x00000050, -#endif -#if CHIP_HDR_NAVI21 - GCR_PERF_SEL_SDMA1_ALL_REQ__NV21 = 0x00000011, - GCR_PERF_SEL_SDMA1_GL2_RANGE_REQ__NV21 = 0x00000012, - GCR_PERF_SEL_SDMA1_GL2_RANGE_LT16K_REQ__NV21 = 0x00000013, - GCR_PERF_SEL_SDMA1_GL2_RANGE_16K_REQ__NV21 = 0x00000014, - GCR_PERF_SEL_SDMA1_GL2_RANGE_GT16K_REQ__NV21 = 0x00000015, - GCR_PERF_SEL_SDMA1_GL2_ALL_REQ__NV21 = 0x00000016, - GCR_PERF_SEL_SDMA1_GL1_RANGE_REQ__NV21 = 0x00000017, - GCR_PERF_SEL_SDMA1_GL1_RANGE_LT16K_REQ__NV21 = 0x00000018, - GCR_PERF_SEL_SDMA1_GL1_RANGE_16K_REQ__NV21 = 0x00000019, - GCR_PERF_SEL_SDMA1_GL1_RANGE_GT16K_REQ__NV21 = 0x0000001a, - GCR_PERF_SEL_SDMA1_GL1_ALL_REQ__NV21 = 0x0000001b, - GCR_PERF_SEL_SDMA1_METADATA_REQ__NV21 = 0x0000001c, - GCR_PERF_SEL_SDMA1_SQC_DATA_REQ__NV21 = 0x0000001d, - GCR_PERF_SEL_SDMA1_SQC_INST_REQ__NV21 = 0x0000001e, - GCR_PERF_SEL_SDMA1_TCP_REQ__NV21 = 0x0000001f, - GCR_PERF_SEL_SDMA1_TCP_TLB_SHOOTDOWN_REQ__NV21 = 0x00000020, - GCR_PERF_SEL_SDMA2_ALL_REQ__NV21 = 0x0000005e, - GCR_PERF_SEL_SDMA2_GL2_RANGE_REQ__NV21 = 0x0000005f, - GCR_PERF_SEL_SDMA2_GL2_RANGE_LT16K_REQ__NV21 = 0x00000060, - GCR_PERF_SEL_SDMA2_GL2_RANGE_16K_REQ__NV21 = 0x00000061, - GCR_PERF_SEL_SDMA2_GL2_RANGE_GT16K_REQ__NV21 = 0x00000062, - GCR_PERF_SEL_SDMA2_GL2_ALL_REQ__NV21 = 0x00000063, - GCR_PERF_SEL_SDMA2_GL1_RANGE_REQ__NV21 = 0x00000064, - GCR_PERF_SEL_SDMA2_GL1_RANGE_LT16K_REQ__NV21 = 0x00000065, - GCR_PERF_SEL_SDMA2_GL1_RANGE_16K_REQ__NV21 = 0x00000066, - GCR_PERF_SEL_SDMA2_GL1_RANGE_GT16K_REQ__NV21 = 0x00000067, - GCR_PERF_SEL_SDMA2_GL1_ALL_REQ__NV21 = 0x00000068, - GCR_PERF_SEL_SDMA2_METADATA_REQ__NV21 = 0x00000069, - GCR_PERF_SEL_SDMA2_SQC_DATA_REQ__NV21 = 0x0000006a, - GCR_PERF_SEL_SDMA2_SQC_INST_REQ__NV21 = 0x0000006b, - GCR_PERF_SEL_SDMA2_TCP_REQ__NV21 = 0x0000006c, - GCR_PERF_SEL_SDMA2_TCP_TLB_SHOOTDOWN_REQ__NV21 = 0x0000006d, - GCR_PERF_SEL_SDMA3_ALL_REQ__NV21 = 0x0000006e, - GCR_PERF_SEL_SDMA3_GL2_RANGE_REQ__NV21 = 0x0000006f, - GCR_PERF_SEL_SDMA3_GL2_RANGE_LT16K_REQ__NV21 = 0x00000070, - GCR_PERF_SEL_SDMA3_GL2_RANGE_16K_REQ__NV21 = 0x00000071, - GCR_PERF_SEL_SDMA3_GL2_RANGE_GT16K_REQ__NV21 = 0x00000072, - GCR_PERF_SEL_SDMA3_GL2_ALL_REQ__NV21 = 0x00000073, - GCR_PERF_SEL_SDMA3_GL1_RANGE_REQ__NV21 = 0x00000074, - GCR_PERF_SEL_SDMA3_GL1_RANGE_LT16K_REQ__NV21 = 0x00000075, - GCR_PERF_SEL_SDMA3_GL1_RANGE_16K_REQ__NV21 = 0x00000076, - GCR_PERF_SEL_SDMA3_GL1_RANGE_GT16K_REQ__NV21 = 0x00000077, - GCR_PERF_SEL_SDMA3_GL1_ALL_REQ__NV21 = 0x00000078, - GCR_PERF_SEL_SDMA3_METADATA_REQ__NV21 = 0x00000079, - GCR_PERF_SEL_SDMA3_SQC_DATA_REQ__NV21 = 0x0000007a, - GCR_PERF_SEL_SDMA3_SQC_INST_REQ__NV21 = 0x0000007b, - GCR_PERF_SEL_SDMA3_TCP_REQ__NV21 = 0x0000007c, - GCR_PERF_SEL_SDMA3_TCP_TLB_SHOOTDOWN_REQ__NV21 = 0x0000007d, - GCR_PERF_SEL_PIO_ALL_REQ__NV21 = 0x0000007e, - GCR_PERF_SEL_PIO_GL2_RANGE_REQ__NV21 = 0x0000007f, - GCR_PERF_SEL_PIO_GL2_RANGE_LT16K_REQ__NV21 = 0x00000080, - GCR_PERF_SEL_PIO_GL2_RANGE_16K_REQ__NV21 = 0x00000081, - GCR_PERF_SEL_PIO_GL2_RANGE_GT16K_REQ__NV21 = 0x00000082, - GCR_PERF_SEL_PIO_GL2_ALL_REQ__NV21 = 0x00000083, - GCR_PERF_SEL_PIO_GL1_RANGE_REQ__NV21 = 0x00000084, - GCR_PERF_SEL_PIO_GL1_RANGE_LT16K_REQ__NV21 = 0x00000085, - GCR_PERF_SEL_PIO_GL1_RANGE_16K_REQ__NV21 = 0x00000086, - GCR_PERF_SEL_PIO_GL1_RANGE_GT16K_REQ__NV21 = 0x00000087, - GCR_PERF_SEL_PIO_GL1_ALL_REQ__NV21 = 0x00000088, - GCR_PERF_SEL_PIO_METADATA_REQ__NV21 = 0x00000089, - GCR_PERF_SEL_PIO_SQC_DATA_REQ__NV21 = 0x0000008a, - GCR_PERF_SEL_PIO_SQC_INST_REQ__NV21 = 0x0000008b, - GCR_PERF_SEL_PIO_TCP_REQ__NV21 = 0x0000008c, - GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__NV21 = 0x0000008d, -#endif -#if CHIP_HDR_NAVI22 - GCR_PERF_SEL_SDMA1_ALL_REQ__NV22 = 0x00000011, - GCR_PERF_SEL_SDMA1_GL2_RANGE_REQ__NV22 = 0x00000012, - GCR_PERF_SEL_SDMA1_GL2_RANGE_LT16K_REQ__NV22 = 0x00000013, - GCR_PERF_SEL_SDMA1_GL2_RANGE_16K_REQ__NV22 = 0x00000014, - GCR_PERF_SEL_SDMA1_GL2_RANGE_GT16K_REQ__NV22 = 0x00000015, - GCR_PERF_SEL_SDMA1_GL2_ALL_REQ__NV22 = 0x00000016, - GCR_PERF_SEL_SDMA1_GL1_RANGE_REQ__NV22 = 0x00000017, - GCR_PERF_SEL_SDMA1_GL1_RANGE_LT16K_REQ__NV22 = 0x00000018, - GCR_PERF_SEL_SDMA1_GL1_RANGE_16K_REQ__NV22 = 0x00000019, - GCR_PERF_SEL_SDMA1_GL1_RANGE_GT16K_REQ__NV22 = 0x0000001a, - GCR_PERF_SEL_SDMA1_GL1_ALL_REQ__NV22 = 0x0000001b, - GCR_PERF_SEL_SDMA1_METADATA_REQ__NV22 = 0x0000001c, - GCR_PERF_SEL_SDMA1_SQC_DATA_REQ__NV22 = 0x0000001d, - GCR_PERF_SEL_SDMA1_SQC_INST_REQ__NV22 = 0x0000001e, - GCR_PERF_SEL_SDMA1_TCP_REQ__NV22 = 0x0000001f, - GCR_PERF_SEL_SDMA1_TCP_TLB_SHOOTDOWN_REQ__NV22 = 0x00000020, - GCR_PERF_SEL_PIO_ALL_REQ__NV22 = 0x0000005e, - GCR_PERF_SEL_PIO_GL2_RANGE_REQ__NV22 = 0x0000005f, - GCR_PERF_SEL_PIO_GL2_RANGE_LT16K_REQ__NV22 = 0x00000060, - GCR_PERF_SEL_PIO_GL2_RANGE_16K_REQ__NV22 = 0x00000061, - GCR_PERF_SEL_PIO_GL2_RANGE_GT16K_REQ__NV22 = 0x00000062, - GCR_PERF_SEL_PIO_GL2_ALL_REQ__NV22 = 0x00000063, - GCR_PERF_SEL_PIO_GL1_RANGE_REQ__NV22 = 0x00000064, - GCR_PERF_SEL_PIO_GL1_RANGE_LT16K_REQ__NV22 = 0x00000065, - GCR_PERF_SEL_PIO_GL1_RANGE_16K_REQ__NV22 = 0x00000066, - GCR_PERF_SEL_PIO_GL1_RANGE_GT16K_REQ__NV22 = 0x00000067, - GCR_PERF_SEL_PIO_GL1_ALL_REQ__NV22 = 0x00000068, - GCR_PERF_SEL_PIO_METADATA_REQ__NV22 = 0x00000069, - GCR_PERF_SEL_PIO_SQC_DATA_REQ__NV22 = 0x0000006a, - GCR_PERF_SEL_PIO_SQC_INST_REQ__NV22 = 0x0000006b, - GCR_PERF_SEL_PIO_TCP_REQ__NV22 = 0x0000006c, - GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__NV22 = 0x0000006d, -#endif -#if CHIP_HDR_NAVI23 - GCR_PERF_SEL_SDMA1_ALL_REQ__NV23 = 0x00000011, - GCR_PERF_SEL_SDMA1_GL2_RANGE_REQ__NV23 = 0x00000012, - GCR_PERF_SEL_SDMA1_GL2_RANGE_LT16K_REQ__NV23 = 0x00000013, - GCR_PERF_SEL_SDMA1_GL2_RANGE_16K_REQ__NV23 = 0x00000014, - GCR_PERF_SEL_SDMA1_GL2_RANGE_GT16K_REQ__NV23 = 0x00000015, - GCR_PERF_SEL_SDMA1_GL2_ALL_REQ__NV23 = 0x00000016, - GCR_PERF_SEL_SDMA1_GL1_RANGE_REQ__NV23 = 0x00000017, - GCR_PERF_SEL_SDMA1_GL1_RANGE_LT16K_REQ__NV23 = 0x00000018, - GCR_PERF_SEL_SDMA1_GL1_RANGE_16K_REQ__NV23 = 0x00000019, - GCR_PERF_SEL_SDMA1_GL1_RANGE_GT16K_REQ__NV23 = 0x0000001a, - GCR_PERF_SEL_SDMA1_GL1_ALL_REQ__NV23 = 0x0000001b, - GCR_PERF_SEL_SDMA1_METADATA_REQ__NV23 = 0x0000001c, - GCR_PERF_SEL_SDMA1_SQC_DATA_REQ__NV23 = 0x0000001d, - GCR_PERF_SEL_SDMA1_SQC_INST_REQ__NV23 = 0x0000001e, - GCR_PERF_SEL_SDMA1_TCP_REQ__NV23 = 0x0000001f, - GCR_PERF_SEL_SDMA1_TCP_TLB_SHOOTDOWN_REQ__NV23 = 0x00000020, - GCR_PERF_SEL_PIO_ALL_REQ__NV23 = 0x0000005e, - GCR_PERF_SEL_PIO_GL2_RANGE_REQ__NV23 = 0x0000005f, - GCR_PERF_SEL_PIO_GL2_RANGE_LT16K_REQ__NV23 = 0x00000060, - GCR_PERF_SEL_PIO_GL2_RANGE_16K_REQ__NV23 = 0x00000061, - GCR_PERF_SEL_PIO_GL2_RANGE_GT16K_REQ__NV23 = 0x00000062, - GCR_PERF_SEL_PIO_GL2_ALL_REQ__NV23 = 0x00000063, - GCR_PERF_SEL_PIO_GL1_RANGE_REQ__NV23 = 0x00000064, - GCR_PERF_SEL_PIO_GL1_RANGE_LT16K_REQ__NV23 = 0x00000065, - GCR_PERF_SEL_PIO_GL1_RANGE_16K_REQ__NV23 = 0x00000066, - GCR_PERF_SEL_PIO_GL1_RANGE_GT16K_REQ__NV23 = 0x00000067, - GCR_PERF_SEL_PIO_GL1_ALL_REQ__NV23 = 0x00000068, - GCR_PERF_SEL_PIO_METADATA_REQ__NV23 = 0x00000069, - GCR_PERF_SEL_PIO_SQC_DATA_REQ__NV23 = 0x0000006a, - GCR_PERF_SEL_PIO_SQC_INST_REQ__NV23 = 0x0000006b, - GCR_PERF_SEL_PIO_TCP_REQ__NV23 = 0x0000006c, - GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__NV23 = 0x0000006d, -#endif -#if CHIP_HDR_NAVI24 - GCR_PERF_SEL_SDMA1_ALL_REQ__NV24 = 0x00000011, - GCR_PERF_SEL_SDMA1_GL2_RANGE_REQ__NV24 = 0x00000012, - GCR_PERF_SEL_SDMA1_GL2_RANGE_LT16K_REQ__NV24 = 0x00000013, - GCR_PERF_SEL_SDMA1_GL2_RANGE_16K_REQ__NV24 = 0x00000014, - GCR_PERF_SEL_SDMA1_GL2_RANGE_GT16K_REQ__NV24 = 0x00000015, - GCR_PERF_SEL_SDMA1_GL2_ALL_REQ__NV24 = 0x00000016, - GCR_PERF_SEL_SDMA1_GL1_RANGE_REQ__NV24 = 0x00000017, - GCR_PERF_SEL_SDMA1_GL1_RANGE_LT16K_REQ__NV24 = 0x00000018, - GCR_PERF_SEL_SDMA1_GL1_RANGE_16K_REQ__NV24 = 0x00000019, - GCR_PERF_SEL_SDMA1_GL1_RANGE_GT16K_REQ__NV24 = 0x0000001a, - GCR_PERF_SEL_SDMA1_GL1_ALL_REQ__NV24 = 0x0000001b, - GCR_PERF_SEL_SDMA1_METADATA_REQ__NV24 = 0x0000001c, - GCR_PERF_SEL_SDMA1_SQC_DATA_REQ__NV24 = 0x0000001d, - GCR_PERF_SEL_SDMA1_SQC_INST_REQ__NV24 = 0x0000001e, - GCR_PERF_SEL_SDMA1_TCP_REQ__NV24 = 0x0000001f, - GCR_PERF_SEL_SDMA1_TCP_TLB_SHOOTDOWN_REQ__NV24 = 0x00000020, - GCR_PERF_SEL_PIO_ALL_REQ__NV24 = 0x0000005e, - GCR_PERF_SEL_PIO_GL2_RANGE_REQ__NV24 = 0x0000005f, - GCR_PERF_SEL_PIO_GL2_RANGE_LT16K_REQ__NV24 = 0x00000060, - GCR_PERF_SEL_PIO_GL2_RANGE_16K_REQ__NV24 = 0x00000061, - GCR_PERF_SEL_PIO_GL2_RANGE_GT16K_REQ__NV24 = 0x00000062, - GCR_PERF_SEL_PIO_GL2_ALL_REQ__NV24 = 0x00000063, - GCR_PERF_SEL_PIO_GL1_RANGE_REQ__NV24 = 0x00000064, - GCR_PERF_SEL_PIO_GL1_RANGE_LT16K_REQ__NV24 = 0x00000065, - GCR_PERF_SEL_PIO_GL1_RANGE_16K_REQ__NV24 = 0x00000066, - GCR_PERF_SEL_PIO_GL1_RANGE_GT16K_REQ__NV24 = 0x00000067, - GCR_PERF_SEL_PIO_GL1_ALL_REQ__NV24 = 0x00000068, - GCR_PERF_SEL_PIO_METADATA_REQ__NV24 = 0x00000069, - GCR_PERF_SEL_PIO_SQC_DATA_REQ__NV24 = 0x0000006a, - GCR_PERF_SEL_PIO_SQC_INST_REQ__NV24 = 0x0000006b, - GCR_PERF_SEL_PIO_TCP_REQ__NV24 = 0x0000006c, - GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__NV24 = 0x0000006d, -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - GCR_PERF_SEL_SDMA1_ALL_REQ__NV3X = 0x00000011, - GCR_PERF_SEL_SDMA1_GL2_RANGE_REQ__NV3X = 0x00000012, - GCR_PERF_SEL_SDMA1_GL2_RANGE_LT16K_REQ__NV3X = 0x00000013, - GCR_PERF_SEL_SDMA1_GL2_RANGE_16K_REQ__NV3X = 0x00000014, - GCR_PERF_SEL_SDMA1_GL2_RANGE_GT16K_REQ__NV3X = 0x00000015, - GCR_PERF_SEL_SDMA1_GL2_ALL_REQ__NV3X = 0x00000016, - GCR_PERF_SEL_SDMA1_GL1_RANGE_REQ__NV3X = 0x00000017, - GCR_PERF_SEL_SDMA1_GL1_RANGE_LT16K_REQ__NV3X = 0x00000018, - GCR_PERF_SEL_SDMA1_GL1_RANGE_16K_REQ__NV3X = 0x00000019, - GCR_PERF_SEL_SDMA1_GL1_RANGE_GT16K_REQ__NV3X = 0x0000001a, - GCR_PERF_SEL_SDMA1_GL1_ALL_REQ__NV3X = 0x0000001b, - GCR_PERF_SEL_SDMA1_METADATA_REQ__NV3X = 0x0000001c, - GCR_PERF_SEL_SDMA1_SQC_DATA_REQ__NV3X = 0x0000001d, - GCR_PERF_SEL_SDMA1_SQC_INST_REQ__NV3X = 0x0000001e, - GCR_PERF_SEL_SDMA1_TCP_REQ__NV3X = 0x0000001f, - GCR_PERF_SEL_SDMA1_GL1_TLB_SHOOTDOWN_REQ__NV3X = 0x00000020, - GCR_PERF_SEL_PMM_ABIT_NUM_FLUSH__NV3X = 0x0000005e, - GCR_PERF_SEL_PMM_ABIT_FLUSH_ONGOING__NV3X = 0x0000005f, - GCR_PERF_SEL_PMM_NUM_INTERRUPT__NV3X = 0x00000060, - GCR_PERF_SEL_PMM_STALL_PMM_IH_CREDITS__NV3X = 0x00000061, - GCR_PERF_SEL_PMM_INTERRUPT_READY_TO_SEND__NV3X = 0x00000062, - GCR_PERF_SEL_PMM_ABIT_TIMER_FLUSH__NV3X = 0x00000063, - GCR_PERF_SEL_PMM_ABIT_FORCE_FLUSH__NV3X = 0x00000064, - GCR_PERF_SEL_PMM_ABIT_FLUSH_INTERRUPT__NV3X = 0x00000065, - GCR_PERF_SEL_PMM_ALOG_INTERRUPT__NV3X = 0x00000066, - GCR_PERF_SEL_PMM_MAM_FLUSH_REQ__NV3X = 0x00000067, - GCR_PERF_SEL_PMM_MAM_FLUSH_RESP__NV3X = 0x00000068, - GCR_PERF_SEL_PMM_RLC_CGCG_REQ__NV3X = 0x00000069, - GCR_PERF_SEL_PMM_RLC_CGCG_RESP__NV3X = 0x0000006a, - GCR_PERF_SEL_RLC_ALL_REQ__NV3X = 0x0000006b, - GCR_PERF_SEL_RLC_GL2_RANGE_REQ__NV3X = 0x0000006c, - GCR_PERF_SEL_RLC_GL2_RANGE_LT16K_REQ__NV3X = 0x0000006d, - GCR_PERF_SEL_RLC_GL2_RANGE_16K_REQ__NV3X = 0x0000006e, - GCR_PERF_SEL_RLC_GL2_RANGE_GT16K_REQ__NV3X = 0x0000006f, - GCR_PERF_SEL_RLC_GL2_ALL_REQ__NV3X = 0x00000070, - GCR_PERF_SEL_RLC_GL1_RANGE_REQ__NV3X = 0x00000071, - GCR_PERF_SEL_RLC_GL1_RANGE_LT16K_REQ__NV3X = 0x00000072, - GCR_PERF_SEL_RLC_GL1_RANGE_16K_REQ__NV3X = 0x00000073, - GCR_PERF_SEL_RLC_GL1_RANGE_GT16K_REQ__NV3X = 0x00000074, - GCR_PERF_SEL_RLC_GL1_ALL_REQ__NV3X = 0x00000075, - GCR_PERF_SEL_RLC_METADATA_REQ__NV3X = 0x00000076, - GCR_PERF_SEL_RLC_SQC_DATA_REQ__NV3X = 0x00000077, - GCR_PERF_SEL_RLC_SQC_INST_REQ__NV3X = 0x00000078, - GCR_PERF_SEL_RLC_TCP_REQ__NV3X = 0x00000079, - GCR_PERF_SEL_RLC_GL1_TLB_SHOOTDOWN_REQ__NV3X = 0x0000007a, - GCR_PERF_SEL_PM_ALL_REQ__NV3X = 0x0000007b, - GCR_PERF_SEL_PM_GL2_RANGE_REQ__NV3X = 0x0000007c, - GCR_PERF_SEL_PM_GL2_RANGE_LT16K_REQ__NV3X = 0x0000007d, - GCR_PERF_SEL_PM_GL2_RANGE_16K_REQ__NV3X = 0x0000007e, - GCR_PERF_SEL_PM_GL2_RANGE_GT16K_REQ__NV3X = 0x0000007f, - GCR_PERF_SEL_PM_GL2_ALL_REQ__NV3X = 0x00000080, - GCR_PERF_SEL_PM_GL1_RANGE_REQ__NV3X = 0x00000081, - GCR_PERF_SEL_PM_GL1_RANGE_LT16K_REQ__NV3X = 0x00000082, - GCR_PERF_SEL_PM_GL1_RANGE_16K_REQ__NV3X = 0x00000083, - GCR_PERF_SEL_PM_GL1_RANGE_GT16K_REQ__NV3X = 0x00000084, - GCR_PERF_SEL_PM_GL1_ALL_REQ__NV3X = 0x00000085, - GCR_PERF_SEL_PM_METADATA_REQ__NV3X = 0x00000086, - GCR_PERF_SEL_PM_SQC_DATA_REQ__NV3X = 0x00000087, - GCR_PERF_SEL_PM_SQC_INST_REQ__NV3X = 0x00000088, - GCR_PERF_SEL_PM_TCP_REQ__NV3X = 0x00000089, - GCR_PERF_SEL_PM_GL1_TLB_SHOOTDOWN_REQ__NV3X = 0x0000008a, - GCR_PERF_SEL_PIO_ALL_REQ__NV3X = 0x0000008b, - GCR_PERF_SEL_PIO_GL2_RANGE_REQ__NV3X = 0x0000008c, - GCR_PERF_SEL_PIO_GL2_RANGE_LT16K_REQ__NV3X = 0x0000008d, - GCR_PERF_SEL_PIO_GL2_RANGE_16K_REQ__NV3X = 0x0000008e, - GCR_PERF_SEL_PIO_GL2_RANGE_GT16K_REQ__NV3X = 0x0000008f, - GCR_PERF_SEL_PIO_GL2_ALL_REQ__NV3X = 0x00000090, - GCR_PERF_SEL_PIO_GL1_RANGE_REQ__NV3X = 0x00000091, - GCR_PERF_SEL_PIO_GL1_RANGE_LT16K_REQ__NV3X = 0x00000092, - GCR_PERF_SEL_PIO_GL1_RANGE_16K_REQ__NV3X = 0x00000093, - GCR_PERF_SEL_PIO_GL1_RANGE_GT16K_REQ__NV3X = 0x00000094, - GCR_PERF_SEL_PIO_GL1_ALL_REQ__NV3X = 0x00000095, - GCR_PERF_SEL_PIO_METADATA_REQ__NV3X = 0x00000096, - GCR_PERF_SEL_PIO_SQC_DATA_REQ__NV3X = 0x00000097, - GCR_PERF_SEL_PIO_SQC_INST_REQ__NV3X = 0x00000098, - GCR_PERF_SEL_PIO_TCP_REQ__NV3X = 0x00000099, - GCR_PERF_SEL_PIO_GL1_TLB_SHOOTDOWN_REQ__NV3X = 0x0000009a, -#endif - GCR_PERF_SEL_SDMA1_ALL_REQ__OSS50 = 0x00000011, - GCR_PERF_SEL_SDMA1_GL2_RANGE_REQ__OSS50 = 0x00000012, - GCR_PERF_SEL_SDMA1_GL2_RANGE_LT16K_REQ__OSS50 = 0x00000013, - GCR_PERF_SEL_SDMA1_GL2_RANGE_16K_REQ__OSS50 = 0x00000014, - GCR_PERF_SEL_SDMA1_GL2_RANGE_GT16K_REQ__OSS50 = 0x00000015, - GCR_PERF_SEL_SDMA1_GL2_ALL_REQ__OSS50 = 0x00000016, - GCR_PERF_SEL_SDMA1_GL1_RANGE_REQ__OSS50 = 0x00000017, - GCR_PERF_SEL_SDMA1_GL1_RANGE_LT16K_REQ__OSS50 = 0x00000018, - GCR_PERF_SEL_SDMA1_GL1_RANGE_16K_REQ__OSS50 = 0x00000019, - GCR_PERF_SEL_SDMA1_GL1_RANGE_GT16K_REQ__OSS50 = 0x0000001a, - GCR_PERF_SEL_SDMA1_GL1_ALL_REQ__OSS50 = 0x0000001b, - GCR_PERF_SEL_SDMA1_METADATA_REQ__OSS50 = 0x0000001c, - GCR_PERF_SEL_SDMA1_SQC_DATA_REQ__OSS50 = 0x0000001d, - GCR_PERF_SEL_SDMA1_SQC_INST_REQ__OSS50 = 0x0000001e, - GCR_PERF_SEL_SDMA1_TCP_REQ__OSS50 = 0x0000001f, - GCR_PERF_SEL_SDMA1_TCP_TLB_SHOOTDOWN_REQ__OSS50 = 0x00000020, - GCR_PERF_SEL_RLC_ALL_REQ__RAPHAEL = 0x00000011, - GCR_PERF_SEL_RLC_GL2_RANGE_REQ__RAPHAEL = 0x00000012, - GCR_PERF_SEL_RLC_GL2_RANGE_LT16K_REQ__RAPHAEL = 0x00000013, - GCR_PERF_SEL_RLC_GL2_RANGE_16K_REQ__RAPHAEL = 0x00000014, - GCR_PERF_SEL_RLC_GL2_RANGE_GT16K_REQ__RAPHAEL = 0x00000015, - GCR_PERF_SEL_RLC_GL2_ALL_REQ__RAPHAEL = 0x00000016, - GCR_PERF_SEL_RLC_GL1_RANGE_REQ__RAPHAEL = 0x00000017, - GCR_PERF_SEL_RLC_GL1_RANGE_LT16K_REQ__RAPHAEL = 0x00000018, - GCR_PERF_SEL_RLC_GL1_RANGE_16K_REQ__RAPHAEL = 0x00000019, - GCR_PERF_SEL_RLC_GL1_RANGE_GT16K_REQ__RAPHAEL = 0x0000001a, - GCR_PERF_SEL_RLC_GL1_ALL_REQ__RAPHAEL = 0x0000001b, - GCR_PERF_SEL_RLC_METADATA_REQ__RAPHAEL = 0x0000001c, - GCR_PERF_SEL_RLC_SQC_DATA_REQ__RAPHAEL = 0x0000001d, - GCR_PERF_SEL_RLC_SQC_INST_REQ__RAPHAEL = 0x0000001e, - GCR_PERF_SEL_RLC_TCP_REQ__RAPHAEL = 0x0000001f, - GCR_PERF_SEL_RLC_TCP_TLB_SHOOTDOWN_REQ__RAPHAEL = 0x00000020, - GCR_PERF_SEL_PIO_ALL_REQ__RAPHAEL = 0x0000005e, - GCR_PERF_SEL_PIO_GL2_RANGE_REQ__RAPHAEL = 0x0000005f, - GCR_PERF_SEL_PIO_GL2_RANGE_LT16K_REQ__RAPHAEL = 0x00000060, - GCR_PERF_SEL_PIO_GL2_RANGE_16K_REQ__RAPHAEL = 0x00000061, - GCR_PERF_SEL_PIO_GL2_RANGE_GT16K_REQ__RAPHAEL = 0x00000062, - GCR_PERF_SEL_PIO_GL2_ALL_REQ__RAPHAEL = 0x00000063, - GCR_PERF_SEL_PIO_GL1_RANGE_REQ__RAPHAEL = 0x00000064, - GCR_PERF_SEL_PIO_GL1_RANGE_LT16K_REQ__RAPHAEL = 0x00000065, - GCR_PERF_SEL_PIO_GL1_RANGE_16K_REQ__RAPHAEL = 0x00000066, - GCR_PERF_SEL_PIO_GL1_RANGE_GT16K_REQ__RAPHAEL = 0x00000067, - GCR_PERF_SEL_PIO_GL1_ALL_REQ__RAPHAEL = 0x00000068, - GCR_PERF_SEL_PIO_METADATA_REQ__RAPHAEL = 0x00000069, - GCR_PERF_SEL_PIO_SQC_DATA_REQ__RAPHAEL = 0x0000006a, - GCR_PERF_SEL_PIO_SQC_INST_REQ__RAPHAEL = 0x0000006b, - GCR_PERF_SEL_PIO_TCP_REQ__RAPHAEL = 0x0000006c, - GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__RAPHAEL = 0x0000006d, - GCR_PERF_SEL_RLC_ALL_REQ__REMBRANDT = 0x00000011, - GCR_PERF_SEL_RLC_GL2_RANGE_REQ__REMBRANDT = 0x00000012, - GCR_PERF_SEL_RLC_GL2_RANGE_LT16K_REQ__REMBRANDT = 0x00000013, - GCR_PERF_SEL_RLC_GL2_RANGE_16K_REQ__REMBRANDT = 0x00000014, - GCR_PERF_SEL_RLC_GL2_RANGE_GT16K_REQ__REMBRANDT = 0x00000015, - GCR_PERF_SEL_RLC_GL2_ALL_REQ__REMBRANDT = 0x00000016, - GCR_PERF_SEL_RLC_GL1_RANGE_REQ__REMBRANDT = 0x00000017, - GCR_PERF_SEL_RLC_GL1_RANGE_LT16K_REQ__REMBRANDT = 0x00000018, - GCR_PERF_SEL_RLC_GL1_RANGE_16K_REQ__REMBRANDT = 0x00000019, - GCR_PERF_SEL_RLC_GL1_RANGE_GT16K_REQ__REMBRANDT = 0x0000001a, - GCR_PERF_SEL_RLC_GL1_ALL_REQ__REMBRANDT = 0x0000001b, - GCR_PERF_SEL_RLC_METADATA_REQ__REMBRANDT = 0x0000001c, - GCR_PERF_SEL_RLC_SQC_DATA_REQ__REMBRANDT = 0x0000001d, - GCR_PERF_SEL_RLC_SQC_INST_REQ__REMBRANDT = 0x0000001e, - GCR_PERF_SEL_RLC_TCP_REQ__REMBRANDT = 0x0000001f, - GCR_PERF_SEL_RLC_TCP_TLB_SHOOTDOWN_REQ__REMBRANDT = 0x00000020, - GCR_PERF_SEL_PIO_ALL_REQ__REMBRANDT = 0x0000005e, - GCR_PERF_SEL_PIO_GL2_RANGE_REQ__REMBRANDT = 0x0000005f, - GCR_PERF_SEL_PIO_GL2_RANGE_LT16K_REQ__REMBRANDT = 0x00000060, - GCR_PERF_SEL_PIO_GL2_RANGE_16K_REQ__REMBRANDT = 0x00000061, - GCR_PERF_SEL_PIO_GL2_RANGE_GT16K_REQ__REMBRANDT = 0x00000062, - GCR_PERF_SEL_PIO_GL2_ALL_REQ__REMBRANDT = 0x00000063, - GCR_PERF_SEL_PIO_GL1_RANGE_REQ__REMBRANDT = 0x00000064, - GCR_PERF_SEL_PIO_GL1_RANGE_LT16K_REQ__REMBRANDT = 0x00000065, - GCR_PERF_SEL_PIO_GL1_RANGE_16K_REQ__REMBRANDT = 0x00000066, - GCR_PERF_SEL_PIO_GL1_RANGE_GT16K_REQ__REMBRANDT = 0x00000067, - GCR_PERF_SEL_PIO_GL1_ALL_REQ__REMBRANDT = 0x00000068, - GCR_PERF_SEL_PIO_METADATA_REQ__REMBRANDT = 0x00000069, - GCR_PERF_SEL_PIO_SQC_DATA_REQ__REMBRANDT = 0x0000006a, - GCR_PERF_SEL_PIO_SQC_INST_REQ__REMBRANDT = 0x0000006b, - GCR_PERF_SEL_PIO_TCP_REQ__REMBRANDT = 0x0000006c, - GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__REMBRANDT = 0x0000006d, -} GCRPerfSel; - -constexpr unsigned int MaxGCRPerfSelGfx101 = GCR_PERF_SEL_UTCL2_FILTERED_RET__GFX10COREPLUS; -#if CHIP_HDR_NAVI21 -constexpr unsigned int MaxGCRPerfSelNv21 = GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__NV21; -#endif -constexpr unsigned int MaxGCRPerfSelRaphael = GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__RAPHAEL; -constexpr unsigned int MaxGCRPerfSelRembrandt = GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__REMBRANDT; -#if CHIP_HDR_NAVI24 -constexpr unsigned int MaxGCRPerfSelNv24 = GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__NV24; -#endif -#if CHIP_HDR_NAVI23 -constexpr unsigned int MaxGCRPerfSelNv23 = GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__NV23; -#endif -#if CHIP_HDR_NAVI22 -constexpr unsigned int MaxGCRPerfSelNv22 = GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__NV22; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -constexpr unsigned int MaxGCRPerfSelNv3x = GCR_PERF_SEL_PIO_GL1_TLB_SHOOTDOWN_REQ__NV3X; -#endif -#if CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxGCRPerfSelApu11 = GCR_PERF_SEL_PIO_GL1_TLB_SHOOTDOWN_REQ__APU11; -#endif - -typedef enum GDS_PERFCOUNT_SELECT { - GDS_PERF_SEL_DS_ADDR_CONFL__GFX09 = 0x00000000, - GDS_PERF_SEL_DS_BANK_CONFL__GFX09 = 0x00000001, - GDS_PERF_SEL_WBUF_FLUSH__GFX09 = 0x00000002, - GDS_PERF_SEL_WR_COMP__GFX09 = 0x00000003, - GDS_PERF_SEL_WBUF_WR__GFX09 = 0x00000004, - GDS_PERF_SEL_RBUF_HIT__GFX09 = 0x00000005, - GDS_PERF_SEL_RBUF_MISS__GFX09 = 0x00000006, - GDS_PERF_SEL_SE0_SH0_NORET__GFX09 = 0x00000007, - GDS_PERF_SEL_SE0_SH0_RET__GFX09 = 0x00000008, - GDS_PERF_SEL_SE0_SH0_ORD_CNT__GFX09 = 0x00000009, - GDS_PERF_SEL_SE0_SH0_2COMP_REQ__GFX09 = 0x0000000a, - GDS_PERF_SEL_SE0_SH0_ORD_WAVE_VALID__GFX09 = 0x0000000b, - GDS_PERF_SEL_SE0_SH0_GDS_DATA_VALID__GFX09 = 0x0000000c, - GDS_PERF_SEL_SE0_SH0_GDS_STALL_BY_ORD__GFX09 = 0x0000000d, - GDS_PERF_SEL_SE0_SH0_GDS_WR_OP__GFX09 = 0x0000000e, - GDS_PERF_SEL_SE0_SH0_GDS_RD_OP__GFX09 = 0x0000000f, - GDS_PERF_SEL_SE0_SH0_GDS_ATOM_OP__GFX09 = 0x00000010, - GDS_PERF_SEL_SE0_SH0_GDS_REL_OP__GFX09 = 0x00000011, - GDS_PERF_SEL_SE0_SH0_GDS_CMPXCH_OP__GFX09 = 0x00000012, - GDS_PERF_SEL_SE0_SH0_GDS_BYTE_OP__GFX09 = 0x00000013, - GDS_PERF_SEL_SE0_SH0_GDS_SHORT_OP__GFX09 = 0x00000014, - GDS_PERF_SEL_SE0_SH1_NORET__GFX09 = 0x00000015, - GDS_PERF_SEL_SE0_SH1_RET__GFX09 = 0x00000016, - GDS_PERF_SEL_SE0_SH1_ORD_CNT__GFX09 = 0x00000017, - GDS_PERF_SEL_SE0_SH1_2COMP_REQ__GFX09 = 0x00000018, - GDS_PERF_SEL_SE0_SH1_ORD_WAVE_VALID__GFX09 = 0x00000019, - GDS_PERF_SEL_SE0_SH1_GDS_DATA_VALID__GFX09 = 0x0000001a, - GDS_PERF_SEL_SE0_SH1_GDS_STALL_BY_ORD__GFX09 = 0x0000001b, - GDS_PERF_SEL_SE0_SH1_GDS_WR_OP__GFX09 = 0x0000001c, - GDS_PERF_SEL_SE0_SH1_GDS_RD_OP__GFX09 = 0x0000001d, - GDS_PERF_SEL_SE0_SH1_GDS_ATOM_OP__GFX09 = 0x0000001e, - GDS_PERF_SEL_SE0_SH1_GDS_REL_OP__GFX09 = 0x0000001f, - GDS_PERF_SEL_SE0_SH1_GDS_CMPXCH_OP__GFX09 = 0x00000020, - GDS_PERF_SEL_SE0_SH1_GDS_BYTE_OP__GFX09 = 0x00000021, - GDS_PERF_SEL_SE0_SH1_GDS_SHORT_OP__GFX09 = 0x00000022, - GDS_PERF_SEL_SE1_SH0_NORET__GFX09 = 0x00000023, - GDS_PERF_SEL_SE1_SH0_RET__GFX09 = 0x00000024, - GDS_PERF_SEL_SE1_SH0_ORD_CNT__GFX09 = 0x00000025, - GDS_PERF_SEL_SE1_SH0_2COMP_REQ__GFX09 = 0x00000026, - GDS_PERF_SEL_SE1_SH0_ORD_WAVE_VALID__GFX09 = 0x00000027, - GDS_PERF_SEL_SE1_SH0_GDS_DATA_VALID__GFX09 = 0x00000028, - GDS_PERF_SEL_SE1_SH0_GDS_STALL_BY_ORD__GFX09 = 0x00000029, - GDS_PERF_SEL_SE1_SH0_GDS_WR_OP__GFX09 = 0x0000002a, - GDS_PERF_SEL_SE1_SH0_GDS_RD_OP__GFX09 = 0x0000002b, - GDS_PERF_SEL_SE1_SH0_GDS_ATOM_OP__GFX09 = 0x0000002c, - GDS_PERF_SEL_SE1_SH0_GDS_REL_OP__GFX09 = 0x0000002d, - GDS_PERF_SEL_SE1_SH0_GDS_CMPXCH_OP__GFX09 = 0x0000002e, - GDS_PERF_SEL_SE1_SH0_GDS_BYTE_OP__GFX09 = 0x0000002f, - GDS_PERF_SEL_SE1_SH0_GDS_SHORT_OP__GFX09 = 0x00000030, - GDS_PERF_SEL_SE1_SH1_NORET__GFX09 = 0x00000031, - GDS_PERF_SEL_SE1_SH1_RET__GFX09 = 0x00000032, - GDS_PERF_SEL_SE1_SH1_ORD_CNT__GFX09 = 0x00000033, - GDS_PERF_SEL_SE1_SH1_2COMP_REQ__GFX09 = 0x00000034, - GDS_PERF_SEL_SE1_SH1_ORD_WAVE_VALID__GFX09 = 0x00000035, - GDS_PERF_SEL_SE1_SH1_GDS_DATA_VALID__GFX09 = 0x00000036, - GDS_PERF_SEL_SE1_SH1_GDS_STALL_BY_ORD__GFX09 = 0x00000037, - GDS_PERF_SEL_SE1_SH1_GDS_WR_OP__GFX09 = 0x00000038, - GDS_PERF_SEL_SE1_SH1_GDS_RD_OP__GFX09 = 0x00000039, - GDS_PERF_SEL_SE1_SH1_GDS_ATOM_OP__GFX09 = 0x0000003a, - GDS_PERF_SEL_SE1_SH1_GDS_REL_OP__GFX09 = 0x0000003b, - GDS_PERF_SEL_SE1_SH1_GDS_CMPXCH_OP__GFX09 = 0x0000003c, - GDS_PERF_SEL_SE1_SH1_GDS_BYTE_OP__GFX09 = 0x0000003d, - GDS_PERF_SEL_SE1_SH1_GDS_SHORT_OP__GFX09 = 0x0000003e, - GDS_PERF_SEL_SE2_SH0_NORET__GFX09 = 0x0000003f, - GDS_PERF_SEL_SE2_SH0_RET__GFX09 = 0x00000040, - GDS_PERF_SEL_SE2_SH0_ORD_CNT__GFX09 = 0x00000041, - GDS_PERF_SEL_SE2_SH0_2COMP_REQ__GFX09 = 0x00000042, - GDS_PERF_SEL_SE2_SH0_ORD_WAVE_VALID__GFX09 = 0x00000043, - GDS_PERF_SEL_SE2_SH0_GDS_DATA_VALID__GFX09 = 0x00000044, - GDS_PERF_SEL_SE2_SH0_GDS_STALL_BY_ORD__GFX09 = 0x00000045, - GDS_PERF_SEL_SE2_SH0_GDS_WR_OP__GFX09 = 0x00000046, - GDS_PERF_SEL_SE2_SH0_GDS_RD_OP__GFX09 = 0x00000047, - GDS_PERF_SEL_SE2_SH0_GDS_ATOM_OP__GFX09 = 0x00000048, - GDS_PERF_SEL_SE2_SH0_GDS_REL_OP__GFX09 = 0x00000049, - GDS_PERF_SEL_SE2_SH0_GDS_CMPXCH_OP__GFX09 = 0x0000004a, - GDS_PERF_SEL_SE2_SH0_GDS_BYTE_OP__GFX09 = 0x0000004b, - GDS_PERF_SEL_SE2_SH0_GDS_SHORT_OP__GFX09 = 0x0000004c, - GDS_PERF_SEL_SE2_SH1_NORET__GFX09 = 0x0000004d, - GDS_PERF_SEL_SE2_SH1_RET__GFX09 = 0x0000004e, - GDS_PERF_SEL_SE2_SH1_ORD_CNT__GFX09 = 0x0000004f, - GDS_PERF_SEL_SE2_SH1_2COMP_REQ__GFX09 = 0x00000050, - GDS_PERF_SEL_SE2_SH1_ORD_WAVE_VALID__GFX09 = 0x00000051, - GDS_PERF_SEL_SE2_SH1_GDS_DATA_VALID__GFX09 = 0x00000052, - GDS_PERF_SEL_SE2_SH1_GDS_STALL_BY_ORD__GFX09 = 0x00000053, - GDS_PERF_SEL_SE2_SH1_GDS_WR_OP__GFX09 = 0x00000054, - GDS_PERF_SEL_SE2_SH1_GDS_RD_OP__GFX09 = 0x00000055, - GDS_PERF_SEL_SE2_SH1_GDS_ATOM_OP__GFX09 = 0x00000056, - GDS_PERF_SEL_SE2_SH1_GDS_REL_OP__GFX09 = 0x00000057, - GDS_PERF_SEL_SE2_SH1_GDS_CMPXCH_OP__GFX09 = 0x00000058, - GDS_PERF_SEL_SE2_SH1_GDS_BYTE_OP__GFX09 = 0x00000059, - GDS_PERF_SEL_SE2_SH1_GDS_SHORT_OP__GFX09 = 0x0000005a, - GDS_PERF_SEL_SE3_SH0_NORET__GFX09 = 0x0000005b, - GDS_PERF_SEL_SE3_SH0_RET__GFX09 = 0x0000005c, - GDS_PERF_SEL_SE3_SH0_ORD_CNT__GFX09 = 0x0000005d, - GDS_PERF_SEL_SE3_SH0_2COMP_REQ__GFX09 = 0x0000005e, - GDS_PERF_SEL_SE3_SH0_ORD_WAVE_VALID__GFX09 = 0x0000005f, - GDS_PERF_SEL_SE3_SH0_GDS_DATA_VALID__GFX09 = 0x00000060, - GDS_PERF_SEL_SE3_SH0_GDS_STALL_BY_ORD__GFX09 = 0x00000061, - GDS_PERF_SEL_SE3_SH0_GDS_WR_OP__GFX09 = 0x00000062, - GDS_PERF_SEL_SE3_SH0_GDS_RD_OP__GFX09 = 0x00000063, - GDS_PERF_SEL_SE3_SH0_GDS_ATOM_OP__GFX09 = 0x00000064, - GDS_PERF_SEL_SE3_SH0_GDS_REL_OP__GFX09 = 0x00000065, - GDS_PERF_SEL_SE3_SH0_GDS_CMPXCH_OP__GFX09 = 0x00000066, - GDS_PERF_SEL_SE3_SH0_GDS_BYTE_OP__GFX09 = 0x00000067, - GDS_PERF_SEL_SE3_SH0_GDS_SHORT_OP__GFX09 = 0x00000068, - GDS_PERF_SEL_SE3_SH1_NORET__GFX09 = 0x00000069, - GDS_PERF_SEL_SE3_SH1_RET__GFX09 = 0x0000006a, - GDS_PERF_SEL_SE3_SH1_ORD_CNT__GFX09 = 0x0000006b, - GDS_PERF_SEL_SE3_SH1_2COMP_REQ__GFX09 = 0x0000006c, - GDS_PERF_SEL_SE3_SH1_ORD_WAVE_VALID__GFX09 = 0x0000006d, - GDS_PERF_SEL_SE3_SH1_GDS_DATA_VALID__GFX09 = 0x0000006e, - GDS_PERF_SEL_SE3_SH1_GDS_STALL_BY_ORD__GFX09 = 0x0000006f, - GDS_PERF_SEL_SE3_SH1_GDS_WR_OP__GFX09 = 0x00000070, - GDS_PERF_SEL_SE3_SH1_GDS_RD_OP__GFX09 = 0x00000071, - GDS_PERF_SEL_SE3_SH1_GDS_ATOM_OP__GFX09 = 0x00000072, - GDS_PERF_SEL_SE3_SH1_GDS_REL_OP__GFX09 = 0x00000073, - GDS_PERF_SEL_SE3_SH1_GDS_CMPXCH_OP__GFX09 = 0x00000074, - GDS_PERF_SEL_SE3_SH1_GDS_BYTE_OP__GFX09 = 0x00000075, - GDS_PERF_SEL_SE3_SH1_GDS_SHORT_OP__GFX09 = 0x00000076, - GDS_PERF_SEL_GWS_RELEASED__GFX09 = 0x00000077, - GDS_PERF_SEL_GWS_BYPASS__GFX09 = 0x00000078, - GDS_PERF_SEL_DS_ADDR_CONFL__GFX101 = 0x00000000, - GDS_PERF_SEL_DS_BANK_CONFL__GFX101 = 0x00000001, - GDS_PERF_SEL_WBUF_FLUSH__GFX101 = 0x00000002, - GDS_PERF_SEL_WR_COMP__GFX101 = 0x00000003, - GDS_PERF_SEL_WBUF_WR__GFX101 = 0x00000004, - GDS_PERF_SEL_RBUF_HIT__GFX101 = 0x00000005, - GDS_PERF_SEL_RBUF_MISS__GFX101 = 0x00000006, - GDS_PERF_SEL_SE0_SH0_NORET__GFX101 = 0x00000007, - GDS_PERF_SEL_SE0_SH0_RET__GFX101 = 0x00000008, - GDS_PERF_SEL_SE0_SH0_ORD_CNT__GFX101 = 0x00000009, - GDS_PERF_SEL_SE0_SH0_2COMP_REQ__GFX101 = 0x0000000a, - GDS_PERF_SEL_SE0_SH0_ORD_WAVE_VALID__GFX101 = 0x0000000b, - GDS_PERF_SEL_SE0_SH0_GDS_DATA_VALID__GFX101 = 0x0000000c, - GDS_PERF_SEL_SE0_SH0_GDS_STALL_BY_ORD__GFX101 = 0x0000000d, - GDS_PERF_SEL_SE0_SH0_GDS_WR_OP__GFX101 = 0x0000000e, - GDS_PERF_SEL_SE0_SH0_GDS_RD_OP__GFX101 = 0x0000000f, - GDS_PERF_SEL_SE0_SH0_GDS_ATOM_OP__GFX101 = 0x00000010, - GDS_PERF_SEL_SE0_SH0_GDS_REL_OP__GFX101 = 0x00000011, - GDS_PERF_SEL_SE0_SH0_GDS_CMPXCH_OP__GFX101 = 0x00000012, - GDS_PERF_SEL_SE0_SH0_GDS_BYTE_OP__GFX101 = 0x00000013, - GDS_PERF_SEL_SE0_SH0_GDS_SHORT_OP__GFX101 = 0x00000014, - GDS_PERF_SEL_SE0_SH1_NORET__GFX101 = 0x00000015, - GDS_PERF_SEL_SE0_SH1_RET__GFX101 = 0x00000016, - GDS_PERF_SEL_SE0_SH1_ORD_CNT__GFX101 = 0x00000017, - GDS_PERF_SEL_SE0_SH1_2COMP_REQ__GFX101 = 0x00000018, - GDS_PERF_SEL_SE0_SH1_ORD_WAVE_VALID__GFX101 = 0x00000019, - GDS_PERF_SEL_SE0_SH1_GDS_DATA_VALID__GFX101 = 0x0000001a, - GDS_PERF_SEL_SE0_SH1_GDS_STALL_BY_ORD__GFX101 = 0x0000001b, - GDS_PERF_SEL_SE0_SH1_GDS_WR_OP__GFX101 = 0x0000001c, - GDS_PERF_SEL_SE0_SH1_GDS_RD_OP__GFX101 = 0x0000001d, - GDS_PERF_SEL_SE0_SH1_GDS_ATOM_OP__GFX101 = 0x0000001e, - GDS_PERF_SEL_SE0_SH1_GDS_REL_OP__GFX101 = 0x0000001f, - GDS_PERF_SEL_SE0_SH1_GDS_CMPXCH_OP__GFX101 = 0x00000020, - GDS_PERF_SEL_SE0_SH1_GDS_BYTE_OP__GFX101 = 0x00000021, - GDS_PERF_SEL_SE0_SH1_GDS_SHORT_OP__GFX101 = 0x00000022, - GDS_PERF_SEL_SE1_SH0_NORET__GFX101 = 0x00000023, - GDS_PERF_SEL_SE1_SH0_RET__GFX101 = 0x00000024, - GDS_PERF_SEL_SE1_SH0_ORD_CNT__GFX101 = 0x00000025, - GDS_PERF_SEL_SE1_SH0_2COMP_REQ__GFX101 = 0x00000026, - GDS_PERF_SEL_SE1_SH0_ORD_WAVE_VALID__GFX101 = 0x00000027, - GDS_PERF_SEL_SE1_SH0_GDS_DATA_VALID__GFX101 = 0x00000028, - GDS_PERF_SEL_SE1_SH0_GDS_STALL_BY_ORD__GFX101 = 0x00000029, - GDS_PERF_SEL_SE1_SH0_GDS_WR_OP__GFX101 = 0x0000002a, - GDS_PERF_SEL_SE1_SH0_GDS_RD_OP__GFX101 = 0x0000002b, - GDS_PERF_SEL_SE1_SH0_GDS_ATOM_OP__GFX101 = 0x0000002c, - GDS_PERF_SEL_SE1_SH0_GDS_REL_OP__GFX101 = 0x0000002d, - GDS_PERF_SEL_SE1_SH0_GDS_CMPXCH_OP__GFX101 = 0x0000002e, - GDS_PERF_SEL_SE1_SH0_GDS_BYTE_OP__GFX101 = 0x0000002f, - GDS_PERF_SEL_SE1_SH0_GDS_SHORT_OP__GFX101 = 0x00000030, - GDS_PERF_SEL_SE1_SH1_NORET__GFX101 = 0x00000031, - GDS_PERF_SEL_SE1_SH1_RET__GFX101 = 0x00000032, - GDS_PERF_SEL_SE1_SH1_ORD_CNT__GFX101 = 0x00000033, - GDS_PERF_SEL_SE1_SH1_2COMP_REQ__GFX101 = 0x00000034, - GDS_PERF_SEL_SE1_SH1_ORD_WAVE_VALID__GFX101 = 0x00000035, - GDS_PERF_SEL_SE1_SH1_GDS_DATA_VALID__GFX101 = 0x00000036, - GDS_PERF_SEL_SE1_SH1_GDS_STALL_BY_ORD__GFX101 = 0x00000037, - GDS_PERF_SEL_SE1_SH1_GDS_WR_OP__GFX101 = 0x00000038, - GDS_PERF_SEL_SE1_SH1_GDS_RD_OP__GFX101 = 0x00000039, - GDS_PERF_SEL_SE1_SH1_GDS_ATOM_OP__GFX101 = 0x0000003a, - GDS_PERF_SEL_SE1_SH1_GDS_REL_OP__GFX101 = 0x0000003b, - GDS_PERF_SEL_SE1_SH1_GDS_CMPXCH_OP__GFX101 = 0x0000003c, - GDS_PERF_SEL_SE1_SH1_GDS_BYTE_OP__GFX101 = 0x0000003d, - GDS_PERF_SEL_SE1_SH1_GDS_SHORT_OP__GFX101 = 0x0000003e, - GDS_PERF_SEL_SE2_SH0_NORET__GFX101 = 0x0000003f, - GDS_PERF_SEL_SE2_SH0_RET__GFX101 = 0x00000040, - GDS_PERF_SEL_SE2_SH0_ORD_CNT__GFX101 = 0x00000041, - GDS_PERF_SEL_SE2_SH0_2COMP_REQ__GFX101 = 0x00000042, - GDS_PERF_SEL_SE2_SH0_ORD_WAVE_VALID__GFX101 = 0x00000043, - GDS_PERF_SEL_SE2_SH0_GDS_DATA_VALID__GFX101 = 0x00000044, - GDS_PERF_SEL_SE2_SH0_GDS_STALL_BY_ORD__GFX101 = 0x00000045, - GDS_PERF_SEL_SE2_SH0_GDS_WR_OP__GFX101 = 0x00000046, - GDS_PERF_SEL_SE2_SH0_GDS_RD_OP__GFX101 = 0x00000047, - GDS_PERF_SEL_SE2_SH0_GDS_ATOM_OP__GFX101 = 0x00000048, - GDS_PERF_SEL_SE2_SH0_GDS_REL_OP__GFX101 = 0x00000049, - GDS_PERF_SEL_SE2_SH0_GDS_CMPXCH_OP__GFX101 = 0x0000004a, - GDS_PERF_SEL_SE2_SH0_GDS_BYTE_OP__GFX101 = 0x0000004b, - GDS_PERF_SEL_SE2_SH0_GDS_SHORT_OP__GFX101 = 0x0000004c, - GDS_PERF_SEL_SE2_SH1_NORET__GFX101 = 0x0000004d, - GDS_PERF_SEL_SE2_SH1_RET__GFX101 = 0x0000004e, - GDS_PERF_SEL_SE2_SH1_ORD_CNT__GFX101 = 0x0000004f, - GDS_PERF_SEL_SE2_SH1_2COMP_REQ__GFX101 = 0x00000050, - GDS_PERF_SEL_SE2_SH1_ORD_WAVE_VALID__GFX101 = 0x00000051, - GDS_PERF_SEL_SE2_SH1_GDS_DATA_VALID__GFX101 = 0x00000052, - GDS_PERF_SEL_SE2_SH1_GDS_STALL_BY_ORD__GFX101 = 0x00000053, - GDS_PERF_SEL_SE2_SH1_GDS_WR_OP__GFX101 = 0x00000054, - GDS_PERF_SEL_SE2_SH1_GDS_RD_OP__GFX101 = 0x00000055, - GDS_PERF_SEL_SE2_SH1_GDS_ATOM_OP__GFX101 = 0x00000056, - GDS_PERF_SEL_SE2_SH1_GDS_REL_OP__GFX101 = 0x00000057, - GDS_PERF_SEL_SE2_SH1_GDS_CMPXCH_OP__GFX101 = 0x00000058, - GDS_PERF_SEL_SE2_SH1_GDS_BYTE_OP__GFX101 = 0x00000059, - GDS_PERF_SEL_SE2_SH1_GDS_SHORT_OP__GFX101 = 0x0000005a, - GDS_PERF_SEL_SE3_SH0_NORET__GFX101 = 0x0000005b, - GDS_PERF_SEL_SE3_SH0_RET__GFX101 = 0x0000005c, - GDS_PERF_SEL_SE3_SH0_ORD_CNT__GFX101 = 0x0000005d, - GDS_PERF_SEL_SE3_SH0_2COMP_REQ__GFX101 = 0x0000005e, - GDS_PERF_SEL_SE3_SH0_ORD_WAVE_VALID__GFX101 = 0x0000005f, - GDS_PERF_SEL_SE3_SH0_GDS_DATA_VALID__GFX101 = 0x00000060, - GDS_PERF_SEL_SE3_SH0_GDS_STALL_BY_ORD__GFX101 = 0x00000061, - GDS_PERF_SEL_SE3_SH0_GDS_WR_OP__GFX101 = 0x00000062, - GDS_PERF_SEL_SE3_SH0_GDS_RD_OP__GFX101 = 0x00000063, - GDS_PERF_SEL_SE3_SH0_GDS_ATOM_OP__GFX101 = 0x00000064, - GDS_PERF_SEL_SE3_SH0_GDS_REL_OP__GFX101 = 0x00000065, - GDS_PERF_SEL_SE3_SH0_GDS_CMPXCH_OP__GFX101 = 0x00000066, - GDS_PERF_SEL_SE3_SH0_GDS_BYTE_OP__GFX101 = 0x00000067, - GDS_PERF_SEL_SE3_SH0_GDS_SHORT_OP__GFX101 = 0x00000068, - GDS_PERF_SEL_SE3_SH1_NORET__GFX101 = 0x00000069, - GDS_PERF_SEL_SE3_SH1_RET__GFX101 = 0x0000006a, - GDS_PERF_SEL_SE3_SH1_ORD_CNT__GFX101 = 0x0000006b, - GDS_PERF_SEL_SE3_SH1_2COMP_REQ__GFX101 = 0x0000006c, - GDS_PERF_SEL_SE3_SH1_ORD_WAVE_VALID__GFX101 = 0x0000006d, - GDS_PERF_SEL_SE3_SH1_GDS_DATA_VALID__GFX101 = 0x0000006e, - GDS_PERF_SEL_SE3_SH1_GDS_STALL_BY_ORD__GFX101 = 0x0000006f, - GDS_PERF_SEL_SE3_SH1_GDS_WR_OP__GFX101 = 0x00000070, - GDS_PERF_SEL_SE3_SH1_GDS_RD_OP__GFX101 = 0x00000071, - GDS_PERF_SEL_SE3_SH1_GDS_ATOM_OP__GFX101 = 0x00000072, - GDS_PERF_SEL_SE3_SH1_GDS_REL_OP__GFX101 = 0x00000073, - GDS_PERF_SEL_SE3_SH1_GDS_CMPXCH_OP__GFX101 = 0x00000074, - GDS_PERF_SEL_SE3_SH1_GDS_BYTE_OP__GFX101 = 0x00000075, - GDS_PERF_SEL_SE3_SH1_GDS_SHORT_OP__GFX101 = 0x00000076, - GDS_PERF_SEL_GWS_RELEASED__GFX101 = 0x00000077, - GDS_PERF_SEL_GWS_BYPASS__GFX101 = 0x00000078, - GDS_PERF_SEL_DS_ADDR_CONFL__GFX103 = 0x00000000, - GDS_PERF_SEL_DS_BANK_CONFL__GFX103 = 0x00000001, - GDS_PERF_SEL_WBUF_FLUSH__GFX103 = 0x00000002, - GDS_PERF_SEL_WR_COMP__GFX103 = 0x00000003, - GDS_PERF_SEL_WBUF_WR__GFX103 = 0x00000004, - GDS_PERF_SEL_RBUF_HIT__GFX103 = 0x00000005, - GDS_PERF_SEL_RBUF_MISS__GFX103 = 0x00000006, - GDS_PERF_SEL_SE0_SH0_NORET__GFX103 = 0x00000007, - GDS_PERF_SEL_SE0_SH0_RET__GFX103 = 0x00000008, - GDS_PERF_SEL_SE0_SH0_ORD_CNT__GFX103 = 0x00000009, - GDS_PERF_SEL_SE0_SH0_2COMP_REQ__GFX103 = 0x0000000a, - GDS_PERF_SEL_SE0_SH0_ORD_WAVE_VALID__GFX103 = 0x0000000b, - GDS_PERF_SEL_SE0_SH0_GDS_DATA_VALID__GFX103 = 0x0000000c, - GDS_PERF_SEL_SE0_SH0_GDS_STALL_BY_ORD__GFX103 = 0x0000000d, - GDS_PERF_SEL_SE0_SH0_GDS_WR_OP__GFX103 = 0x0000000e, - GDS_PERF_SEL_SE0_SH0_GDS_RD_OP__GFX103 = 0x0000000f, - GDS_PERF_SEL_SE0_SH0_GDS_ATOM_OP__GFX103 = 0x00000010, - GDS_PERF_SEL_SE0_SH0_GDS_REL_OP__GFX103 = 0x00000011, - GDS_PERF_SEL_SE0_SH0_GDS_CMPXCH_OP__GFX103 = 0x00000012, - GDS_PERF_SEL_SE0_SH0_GDS_BYTE_OP__GFX103 = 0x00000013, - GDS_PERF_SEL_SE0_SH0_GDS_SHORT_OP__GFX103 = 0x00000014, - GDS_PERF_SEL_SE0_SH1_NORET__GFX103 = 0x00000015, - GDS_PERF_SEL_SE0_SH1_RET__GFX103 = 0x00000016, - GDS_PERF_SEL_SE0_SH1_ORD_CNT__GFX103 = 0x00000017, - GDS_PERF_SEL_SE0_SH1_2COMP_REQ__GFX103 = 0x00000018, - GDS_PERF_SEL_SE0_SH1_ORD_WAVE_VALID__GFX103 = 0x00000019, - GDS_PERF_SEL_SE0_SH1_GDS_DATA_VALID__GFX103 = 0x0000001a, - GDS_PERF_SEL_SE0_SH1_GDS_STALL_BY_ORD__GFX103 = 0x0000001b, - GDS_PERF_SEL_SE0_SH1_GDS_WR_OP__GFX103 = 0x0000001c, - GDS_PERF_SEL_SE0_SH1_GDS_RD_OP__GFX103 = 0x0000001d, - GDS_PERF_SEL_SE0_SH1_GDS_ATOM_OP__GFX103 = 0x0000001e, - GDS_PERF_SEL_SE0_SH1_GDS_REL_OP__GFX103 = 0x0000001f, - GDS_PERF_SEL_SE0_SH1_GDS_CMPXCH_OP__GFX103 = 0x00000020, - GDS_PERF_SEL_SE0_SH1_GDS_BYTE_OP__GFX103 = 0x00000021, - GDS_PERF_SEL_SE0_SH1_GDS_SHORT_OP__GFX103 = 0x00000022, - GDS_PERF_SEL_SE1_SH0_NORET__GFX103 = 0x00000023, - GDS_PERF_SEL_SE1_SH0_RET__GFX103 = 0x00000024, - GDS_PERF_SEL_SE1_SH0_ORD_CNT__GFX103 = 0x00000025, - GDS_PERF_SEL_SE1_SH0_2COMP_REQ__GFX103 = 0x00000026, - GDS_PERF_SEL_SE1_SH0_ORD_WAVE_VALID__GFX103 = 0x00000027, - GDS_PERF_SEL_SE1_SH0_GDS_DATA_VALID__GFX103 = 0x00000028, - GDS_PERF_SEL_SE1_SH0_GDS_STALL_BY_ORD__GFX103 = 0x00000029, - GDS_PERF_SEL_SE1_SH0_GDS_WR_OP__GFX103 = 0x0000002a, - GDS_PERF_SEL_SE1_SH0_GDS_RD_OP__GFX103 = 0x0000002b, - GDS_PERF_SEL_SE1_SH0_GDS_ATOM_OP__GFX103 = 0x0000002c, - GDS_PERF_SEL_SE1_SH0_GDS_REL_OP__GFX103 = 0x0000002d, - GDS_PERF_SEL_SE1_SH0_GDS_CMPXCH_OP__GFX103 = 0x0000002e, - GDS_PERF_SEL_SE1_SH0_GDS_BYTE_OP__GFX103 = 0x0000002f, - GDS_PERF_SEL_SE1_SH0_GDS_SHORT_OP__GFX103 = 0x00000030, - GDS_PERF_SEL_SE1_SH1_NORET__GFX103 = 0x00000031, - GDS_PERF_SEL_SE1_SH1_RET__GFX103 = 0x00000032, - GDS_PERF_SEL_SE1_SH1_ORD_CNT__GFX103 = 0x00000033, - GDS_PERF_SEL_SE1_SH1_2COMP_REQ__GFX103 = 0x00000034, - GDS_PERF_SEL_SE1_SH1_ORD_WAVE_VALID__GFX103 = 0x00000035, - GDS_PERF_SEL_SE1_SH1_GDS_DATA_VALID__GFX103 = 0x00000036, - GDS_PERF_SEL_SE1_SH1_GDS_STALL_BY_ORD__GFX103 = 0x00000037, - GDS_PERF_SEL_SE1_SH1_GDS_WR_OP__GFX103 = 0x00000038, - GDS_PERF_SEL_SE1_SH1_GDS_RD_OP__GFX103 = 0x00000039, - GDS_PERF_SEL_SE1_SH1_GDS_ATOM_OP__GFX103 = 0x0000003a, - GDS_PERF_SEL_SE1_SH1_GDS_REL_OP__GFX103 = 0x0000003b, - GDS_PERF_SEL_SE1_SH1_GDS_CMPXCH_OP__GFX103 = 0x0000003c, - GDS_PERF_SEL_SE1_SH1_GDS_BYTE_OP__GFX103 = 0x0000003d, - GDS_PERF_SEL_SE1_SH1_GDS_SHORT_OP__GFX103 = 0x0000003e, - GDS_PERF_SEL_SE2_SH0_NORET__GFX103 = 0x0000003f, - GDS_PERF_SEL_SE2_SH0_RET__GFX103 = 0x00000040, - GDS_PERF_SEL_SE2_SH0_ORD_CNT__GFX103 = 0x00000041, - GDS_PERF_SEL_SE2_SH0_2COMP_REQ__GFX103 = 0x00000042, - GDS_PERF_SEL_SE2_SH0_ORD_WAVE_VALID__GFX103 = 0x00000043, - GDS_PERF_SEL_SE2_SH0_GDS_DATA_VALID__GFX103 = 0x00000044, - GDS_PERF_SEL_SE2_SH0_GDS_STALL_BY_ORD__GFX103 = 0x00000045, - GDS_PERF_SEL_SE2_SH0_GDS_WR_OP__GFX103 = 0x00000046, - GDS_PERF_SEL_SE2_SH0_GDS_RD_OP__GFX103 = 0x00000047, - GDS_PERF_SEL_SE2_SH0_GDS_ATOM_OP__GFX103 = 0x00000048, - GDS_PERF_SEL_SE2_SH0_GDS_REL_OP__GFX103 = 0x00000049, - GDS_PERF_SEL_SE2_SH0_GDS_CMPXCH_OP__GFX103 = 0x0000004a, - GDS_PERF_SEL_SE2_SH0_GDS_BYTE_OP__GFX103 = 0x0000004b, - GDS_PERF_SEL_SE2_SH0_GDS_SHORT_OP__GFX103 = 0x0000004c, - GDS_PERF_SEL_SE2_SH1_NORET__GFX103 = 0x0000004d, - GDS_PERF_SEL_SE2_SH1_RET__GFX103 = 0x0000004e, - GDS_PERF_SEL_SE2_SH1_ORD_CNT__GFX103 = 0x0000004f, - GDS_PERF_SEL_SE2_SH1_2COMP_REQ__GFX103 = 0x00000050, - GDS_PERF_SEL_SE2_SH1_ORD_WAVE_VALID__GFX103 = 0x00000051, - GDS_PERF_SEL_SE2_SH1_GDS_DATA_VALID__GFX103 = 0x00000052, - GDS_PERF_SEL_SE2_SH1_GDS_STALL_BY_ORD__GFX103 = 0x00000053, - GDS_PERF_SEL_SE2_SH1_GDS_WR_OP__GFX103 = 0x00000054, - GDS_PERF_SEL_SE2_SH1_GDS_RD_OP__GFX103 = 0x00000055, - GDS_PERF_SEL_SE2_SH1_GDS_ATOM_OP__GFX103 = 0x00000056, - GDS_PERF_SEL_SE2_SH1_GDS_REL_OP__GFX103 = 0x00000057, - GDS_PERF_SEL_SE2_SH1_GDS_CMPXCH_OP__GFX103 = 0x00000058, - GDS_PERF_SEL_SE2_SH1_GDS_BYTE_OP__GFX103 = 0x00000059, - GDS_PERF_SEL_SE2_SH1_GDS_SHORT_OP__GFX103 = 0x0000005a, - GDS_PERF_SEL_SE3_SH0_NORET__GFX103 = 0x0000005b, - GDS_PERF_SEL_SE3_SH0_RET__GFX103 = 0x0000005c, - GDS_PERF_SEL_SE3_SH0_ORD_CNT__GFX103 = 0x0000005d, - GDS_PERF_SEL_SE3_SH0_2COMP_REQ__GFX103 = 0x0000005e, - GDS_PERF_SEL_SE3_SH0_ORD_WAVE_VALID__GFX103 = 0x0000005f, - GDS_PERF_SEL_SE3_SH0_GDS_DATA_VALID__GFX103 = 0x00000060, - GDS_PERF_SEL_SE3_SH0_GDS_STALL_BY_ORD__GFX103 = 0x00000061, - GDS_PERF_SEL_SE3_SH0_GDS_WR_OP__GFX103 = 0x00000062, - GDS_PERF_SEL_SE3_SH0_GDS_RD_OP__GFX103 = 0x00000063, - GDS_PERF_SEL_SE3_SH0_GDS_ATOM_OP__GFX103 = 0x00000064, - GDS_PERF_SEL_SE3_SH0_GDS_REL_OP__GFX103 = 0x00000065, - GDS_PERF_SEL_SE3_SH0_GDS_CMPXCH_OP__GFX103 = 0x00000066, - GDS_PERF_SEL_SE3_SH0_GDS_BYTE_OP__GFX103 = 0x00000067, - GDS_PERF_SEL_SE3_SH0_GDS_SHORT_OP__GFX103 = 0x00000068, - GDS_PERF_SEL_SE3_SH1_NORET__GFX103 = 0x00000069, - GDS_PERF_SEL_SE3_SH1_RET__GFX103 = 0x0000006a, - GDS_PERF_SEL_SE3_SH1_ORD_CNT__GFX103 = 0x0000006b, - GDS_PERF_SEL_SE3_SH1_2COMP_REQ__GFX103 = 0x0000006c, - GDS_PERF_SEL_SE3_SH1_ORD_WAVE_VALID__GFX103 = 0x0000006d, - GDS_PERF_SEL_SE3_SH1_GDS_DATA_VALID__GFX103 = 0x0000006e, - GDS_PERF_SEL_SE3_SH1_GDS_STALL_BY_ORD__GFX103 = 0x0000006f, - GDS_PERF_SEL_SE3_SH1_GDS_WR_OP__GFX103 = 0x00000070, - GDS_PERF_SEL_SE3_SH1_GDS_RD_OP__GFX103 = 0x00000071, - GDS_PERF_SEL_SE3_SH1_GDS_ATOM_OP__GFX103 = 0x00000072, - GDS_PERF_SEL_SE3_SH1_GDS_REL_OP__GFX103 = 0x00000073, - GDS_PERF_SEL_SE3_SH1_GDS_CMPXCH_OP__GFX103 = 0x00000074, - GDS_PERF_SEL_SE3_SH1_GDS_BYTE_OP__GFX103 = 0x00000075, - GDS_PERF_SEL_SE3_SH1_GDS_SHORT_OP__GFX103 = 0x00000076, - GDS_PERF_SEL_GWS_RELEASED__GFX103 = 0x00000077, - GDS_PERF_SEL_GWS_BYPASS__GFX103 = 0x00000078, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - GDS_PERF_SEL_WR_COMP__GFX11 = 0x00000000, - GDS_PERF_SEL_WBUF_WR__GFX11 = 0x00000001, - GDS_PERF_SEL_SE0_NORET__GFX11 = 0x00000002, - GDS_PERF_SEL_SE0_RET__GFX11 = 0x00000003, - GDS_PERF_SEL_SE0_ORD_CNT__GFX11 = 0x00000004, - GDS_PERF_SEL_SE0_2COMP_REQ__GFX11 = 0x00000005, - GDS_PERF_SEL_SE0_ORD_WAVE_VALID__GFX11 = 0x00000006, - GDS_PERF_SEL_SE0_GDS_STALL_BY_ORD__GFX11 = 0x00000007, - GDS_PERF_SEL_SE0_GDS_WR_OP__GFX11 = 0x00000008, - GDS_PERF_SEL_SE0_GDS_RD_OP__GFX11 = 0x00000009, - GDS_PERF_SEL_SE0_GDS_ATOM_OP__GFX11 = 0x0000000a, - GDS_PERF_SEL_SE0_GDS_REL_OP__GFX11 = 0x0000000b, - GDS_PERF_SEL_SE0_GDS_CMPXCH_OP__GFX11 = 0x0000000c, - GDS_PERF_SEL_SE0_GDS_BYTE_OP__GFX11 = 0x0000000d, - GDS_PERF_SEL_SE0_GDS_SHORT_OP__GFX11 = 0x0000000e, - GDS_PERF_SEL_SE1_NORET__GFX11 = 0x0000000f, - GDS_PERF_SEL_SE1_RET__GFX11 = 0x00000010, - GDS_PERF_SEL_SE1_ORD_CNT__GFX11 = 0x00000011, - GDS_PERF_SEL_SE1_2COMP_REQ__GFX11 = 0x00000012, - GDS_PERF_SEL_SE1_ORD_WAVE_VALID__GFX11 = 0x00000013, - GDS_PERF_SEL_SE1_GDS_STALL_BY_ORD__GFX11 = 0x00000014, - GDS_PERF_SEL_SE1_GDS_WR_OP__GFX11 = 0x00000015, - GDS_PERF_SEL_SE1_GDS_RD_OP__GFX11 = 0x00000016, - GDS_PERF_SEL_SE1_GDS_ATOM_OP__GFX11 = 0x00000017, - GDS_PERF_SEL_SE1_GDS_REL_OP__GFX11 = 0x00000018, - GDS_PERF_SEL_SE1_GDS_CMPXCH_OP__GFX11 = 0x00000019, - GDS_PERF_SEL_SE1_GDS_BYTE_OP__GFX11 = 0x0000001a, - GDS_PERF_SEL_SE1_GDS_SHORT_OP__GFX11 = 0x0000001b, - GDS_PERF_SEL_SE2_NORET__GFX11 = 0x0000001c, - GDS_PERF_SEL_SE2_RET__GFX11 = 0x0000001d, - GDS_PERF_SEL_SE2_ORD_CNT__GFX11 = 0x0000001e, - GDS_PERF_SEL_SE2_2COMP_REQ__GFX11 = 0x0000001f, - GDS_PERF_SEL_SE2_ORD_WAVE_VALID__GFX11 = 0x00000020, - GDS_PERF_SEL_SE2_GDS_STALL_BY_ORD__GFX11 = 0x00000021, - GDS_PERF_SEL_SE2_GDS_WR_OP__GFX11 = 0x00000022, - GDS_PERF_SEL_SE2_GDS_RD_OP__GFX11 = 0x00000023, - GDS_PERF_SEL_SE2_GDS_ATOM_OP__GFX11 = 0x00000024, - GDS_PERF_SEL_SE2_GDS_REL_OP__GFX11 = 0x00000025, - GDS_PERF_SEL_SE2_GDS_CMPXCH_OP__GFX11 = 0x00000026, - GDS_PERF_SEL_SE2_GDS_BYTE_OP__GFX11 = 0x00000027, - GDS_PERF_SEL_SE2_GDS_SHORT_OP__GFX11 = 0x00000028, - GDS_PERF_SEL_SE3_NORET__GFX11 = 0x00000029, - GDS_PERF_SEL_SE3_RET__GFX11 = 0x0000002a, - GDS_PERF_SEL_SE3_ORD_CNT__GFX11 = 0x0000002b, - GDS_PERF_SEL_SE3_2COMP_REQ__GFX11 = 0x0000002c, - GDS_PERF_SEL_SE3_ORD_WAVE_VALID__GFX11 = 0x0000002d, - GDS_PERF_SEL_SE3_GDS_STALL_BY_ORD__GFX11 = 0x0000002e, - GDS_PERF_SEL_SE3_GDS_WR_OP__GFX11 = 0x0000002f, - GDS_PERF_SEL_SE3_GDS_RD_OP__GFX11 = 0x00000030, - GDS_PERF_SEL_SE3_GDS_ATOM_OP__GFX11 = 0x00000031, - GDS_PERF_SEL_SE3_GDS_REL_OP__GFX11 = 0x00000032, - GDS_PERF_SEL_SE3_GDS_CMPXCH_OP__GFX11 = 0x00000033, - GDS_PERF_SEL_SE3_GDS_BYTE_OP__GFX11 = 0x00000034, - GDS_PERF_SEL_SE3_GDS_SHORT_OP__GFX11 = 0x00000035, - GDS_PERF_SEL_SE4_NORET__GFX11 = 0x00000036, - GDS_PERF_SEL_SE4_RET__GFX11 = 0x00000037, - GDS_PERF_SEL_SE4_ORD_CNT__GFX11 = 0x00000038, - GDS_PERF_SEL_SE4_2COMP_REQ__GFX11 = 0x00000039, - GDS_PERF_SEL_SE4_ORD_WAVE_VALID__GFX11 = 0x0000003a, - GDS_PERF_SEL_SE4_GDS_STALL_BY_ORD__GFX11 = 0x0000003b, - GDS_PERF_SEL_SE4_GDS_WR_OP__GFX11 = 0x0000003c, - GDS_PERF_SEL_SE4_GDS_RD_OP__GFX11 = 0x0000003d, - GDS_PERF_SEL_SE4_GDS_ATOM_OP__GFX11 = 0x0000003e, - GDS_PERF_SEL_SE4_GDS_REL_OP__GFX11 = 0x0000003f, - GDS_PERF_SEL_SE4_GDS_CMPXCH_OP__GFX11 = 0x00000040, - GDS_PERF_SEL_SE4_GDS_BYTE_OP__GFX11 = 0x00000041, - GDS_PERF_SEL_SE4_GDS_SHORT_OP__GFX11 = 0x00000042, - GDS_PERF_SEL_SE5_NORET__GFX11 = 0x00000043, - GDS_PERF_SEL_SE5_RET__GFX11 = 0x00000044, - GDS_PERF_SEL_SE5_ORD_CNT__GFX11 = 0x00000045, - GDS_PERF_SEL_SE5_2COMP_REQ__GFX11 = 0x00000046, - GDS_PERF_SEL_SE5_ORD_WAVE_VALID__GFX11 = 0x00000047, - GDS_PERF_SEL_SE5_GDS_STALL_BY_ORD__GFX11 = 0x00000048, - GDS_PERF_SEL_SE5_GDS_WR_OP__GFX11 = 0x00000049, - GDS_PERF_SEL_SE5_GDS_RD_OP__GFX11 = 0x0000004a, - GDS_PERF_SEL_SE5_GDS_ATOM_OP__GFX11 = 0x0000004b, - GDS_PERF_SEL_SE5_GDS_REL_OP__GFX11 = 0x0000004c, - GDS_PERF_SEL_SE5_GDS_CMPXCH_OP__GFX11 = 0x0000004d, - GDS_PERF_SEL_SE5_GDS_BYTE_OP__GFX11 = 0x0000004e, - GDS_PERF_SEL_SE5_GDS_SHORT_OP__GFX11 = 0x0000004f, - GDS_PERF_SEL_SE6_NORET__GFX11 = 0x00000050, - GDS_PERF_SEL_SE6_RET__GFX11 = 0x00000051, - GDS_PERF_SEL_SE6_ORD_CNT__GFX11 = 0x00000052, - GDS_PERF_SEL_SE6_2COMP_REQ__GFX11 = 0x00000053, - GDS_PERF_SEL_SE6_ORD_WAVE_VALID__GFX11 = 0x00000054, - GDS_PERF_SEL_SE6_GDS_STALL_BY_ORD__GFX11 = 0x00000055, - GDS_PERF_SEL_SE6_GDS_WR_OP__GFX11 = 0x00000056, - GDS_PERF_SEL_SE6_GDS_RD_OP__GFX11 = 0x00000057, - GDS_PERF_SEL_SE6_GDS_ATOM_OP__GFX11 = 0x00000058, - GDS_PERF_SEL_SE6_GDS_REL_OP__GFX11 = 0x00000059, - GDS_PERF_SEL_SE6_GDS_CMPXCH_OP__GFX11 = 0x0000005a, - GDS_PERF_SEL_SE6_GDS_BYTE_OP__GFX11 = 0x0000005b, - GDS_PERF_SEL_SE6_GDS_SHORT_OP__GFX11 = 0x0000005c, - GDS_PERF_SEL_SE7_NORET__GFX11 = 0x0000005d, - GDS_PERF_SEL_SE7_RET__GFX11 = 0x0000005e, - GDS_PERF_SEL_SE7_ORD_CNT__GFX11 = 0x0000005f, - GDS_PERF_SEL_SE7_2COMP_REQ__GFX11 = 0x00000060, - GDS_PERF_SEL_SE7_ORD_WAVE_VALID__GFX11 = 0x00000061, - GDS_PERF_SEL_SE7_GDS_STALL_BY_ORD__GFX11 = 0x00000062, - GDS_PERF_SEL_SE7_GDS_WR_OP__GFX11 = 0x00000063, - GDS_PERF_SEL_SE7_GDS_RD_OP__GFX11 = 0x00000064, - GDS_PERF_SEL_SE7_GDS_ATOM_OP__GFX11 = 0x00000065, - GDS_PERF_SEL_SE7_GDS_REL_OP__GFX11 = 0x00000066, - GDS_PERF_SEL_SE7_GDS_CMPXCH_OP__GFX11 = 0x00000067, - GDS_PERF_SEL_SE7_GDS_BYTE_OP__GFX11 = 0x00000068, - GDS_PERF_SEL_SE7_GDS_SHORT_OP__GFX11 = 0x00000069, - GDS_PERF_SEL_GWS_RELEASED__GFX11 = 0x0000006a, - GDS_PERF_SEL_GWS_BYPASS__GFX11 = 0x0000006b, - GDS_PERF_SEL_SE0_GS_ORD_CNT_NOT_REL_NOT_DONE__GFX11 = 0x0000006c, - GDS_PERF_SEL_SE0_GS_ORD_CNT_REL_DONE__GFX11 = 0x0000006d, - GDS_PERF_SEL_SE0_ADD_GS_REG_OP__GFX11 = 0x0000006e, - GDS_PERF_SEL_SE0_SUB_GS_REG_OP__GFX11 = 0x0000006f, - GDS_PERF_SEL_SE0_GS_WAVE_ID_VALID__GFX11 = 0x00000070, - GDS_PERF_SEL_SE1_GS_ORD_CNT_NOT_REL_NOT_DONE__GFX11 = 0x00000071, - GDS_PERF_SEL_SE1_GS_ORD_CNT_REL_DONE__GFX11 = 0x00000072, - GDS_PERF_SEL_SE1_ADD_GS_REG_OP__GFX11 = 0x00000073, - GDS_PERF_SEL_SE1_SUB_GS_REG_OP__GFX11 = 0x00000074, - GDS_PERF_SEL_SE1_GS_WAVE_ID_VALID__GFX11 = 0x00000075, - GDS_PERF_SEL_SE2_GS_ORD_CNT_NOT_REL_NOT_DONE__GFX11 = 0x00000076, - GDS_PERF_SEL_SE2_GS_ORD_CNT_REL_DONE__GFX11 = 0x00000077, - GDS_PERF_SEL_SE2_ADD_GS_REG_OP__GFX11 = 0x00000078, - GDS_PERF_SEL_SE2_SUB_GS_REG_OP__GFX11 = 0x00000079, - GDS_PERF_SEL_SE2_GS_WAVE_ID_VALID__GFX11 = 0x0000007a, - GDS_PERF_SEL_SE3_GS_ORD_CNT_NOT_REL_NOT_DONE__GFX11 = 0x0000007b, - GDS_PERF_SEL_SE3_GS_ORD_CNT_REL_DONE__GFX11 = 0x0000007c, - GDS_PERF_SEL_SE3_ADD_GS_REG_OP__GFX11 = 0x0000007d, - GDS_PERF_SEL_SE3_SUB_GS_REG_OP__GFX11 = 0x0000007e, - GDS_PERF_SEL_SE3_GS_WAVE_ID_VALID__GFX11 = 0x0000007f, - GDS_PERF_SEL_SE4_GS_ORD_CNT_NOT_REL_NOT_DONE__GFX11 = 0x00000080, - GDS_PERF_SEL_SE4_GS_ORD_CNT_REL_DONE__GFX11 = 0x00000081, - GDS_PERF_SEL_SE4_ADD_GS_REG_OP__GFX11 = 0x00000082, - GDS_PERF_SEL_SE4_SUB_GS_REG_OP__GFX11 = 0x00000083, - GDS_PERF_SEL_SE4_GS_WAVE_ID_VALID__GFX11 = 0x00000084, - GDS_PERF_SEL_SE5_GS_ORD_CNT_NOT_REL_NOT_DONE__GFX11 = 0x00000085, - GDS_PERF_SEL_SE5_GS_ORD_CNT_REL_DONE__GFX11 = 0x00000086, - GDS_PERF_SEL_SE5_ADD_GS_REG_OP__GFX11 = 0x00000087, - GDS_PERF_SEL_SE5_SUB_GS_REG_OP__GFX11 = 0x00000088, - GDS_PERF_SEL_SE5_GS_WAVE_ID_VALID__GFX11 = 0x00000089, - GDS_PERF_SEL_SE6_GS_ORD_CNT_NOT_REL_NOT_DONE__GFX11 = 0x0000008a, - GDS_PERF_SEL_SE6_GS_ORD_CNT_REL_DONE__GFX11 = 0x0000008b, - GDS_PERF_SEL_SE6_ADD_GS_REG_OP__GFX11 = 0x0000008c, - GDS_PERF_SEL_SE6_SUB_GS_REG_OP__GFX11 = 0x0000008d, - GDS_PERF_SEL_SE6_GS_WAVE_ID_VALID__GFX11 = 0x0000008e, - GDS_PERF_SEL_SE7_GS_ORD_CNT_NOT_REL_NOT_DONE__GFX11 = 0x0000008f, - GDS_PERF_SEL_SE7_GS_ORD_CNT_REL_DONE__GFX11 = 0x00000090, - GDS_PERF_SEL_SE7_ADD_GS_REG_OP__GFX11 = 0x00000091, - GDS_PERF_SEL_SE7_SUB_GS_REG_OP__GFX11 = 0x00000092, - GDS_PERF_SEL_SE7_GS_WAVE_ID_VALID__GFX11 = 0x00000093, -#endif -} GDS_PERFCOUNT_SELECT; - -constexpr unsigned int MaxGdsPerfcountSelectGfx103 = GDS_PERF_SEL_GWS_BYPASS__GFX103; -constexpr unsigned int MaxGdsPerfcountSelectGfx09 = GDS_PERF_SEL_GWS_BYPASS__GFX09; -constexpr unsigned int MaxGdsPerfcountSelectGfx101 = GDS_PERF_SEL_GWS_BYPASS__GFX101; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxGdsPerfcountSelectGfx11 = GDS_PERF_SEL_SE7_GS_WAVE_ID_VALID__GFX11; -#endif - -typedef enum GE1_PERFCOUNT_SELECT { - ge1_assembler_busy = 0x00000000, - ge1_assembler_stalled = 0x00000001, - ge1_dma_busy = 0x00000002, - ge1_dma_lat_bin_0 = 0x00000003, - ge1_dma_lat_bin_1 = 0x00000004, - ge1_dma_lat_bin_2 = 0x00000005, - ge1_dma_lat_bin_3 = 0x00000006, - ge1_dma_lat_bin_4 = 0x00000007, - ge1_dma_lat_bin_5 = 0x00000008, - ge1_dma_lat_bin_6 = 0x00000009, - ge1_dma_lat_bin_7 = 0x0000000a, - ge1_dma_return_cl0 = 0x0000000b, - ge1_bypass_fifo_full__GFX103 = 0x00000020, - ge1_vert_group_limit_hit__GFX103 = 0x00000023, - ge1_dma_return_cl1__GFX103COREPLUS = 0x0000000c, - ge1_dma_utcl1_consecutive_retry_event__GFX103COREPLUS = 0x0000000d, - ge1_dma_utcl1_request_event__GFX103COREPLUS = 0x0000000e, - ge1_dma_utcl1_retry_event__GFX103COREPLUS = 0x0000000f, - ge1_dma_utcl1_stall_event__GFX103COREPLUS = 0x00000010, - ge1_dma_utcl1_stall_utcl2_event__GFX103COREPLUS = 0x00000011, - ge1_dma_utcl1_translation_hit_event__GFX103COREPLUS = 0x00000012, - ge1_dma_utcl1_translation_miss_event__GFX103COREPLUS = 0x00000013, - ge1_assembler_dma_starved__GFX103COREPLUS = 0x00000014, - ge1_rbiu_di_fifo_stalled_p0__GFX103COREPLUS = 0x00000015, - ge1_rbiu_di_fifo_starved_p0__GFX103COREPLUS = 0x00000016, - ge1_rbiu_dr_fifo_stalled_p0__GFX103COREPLUS = 0x00000017, - ge1_rbiu_dr_fifo_starved_p0__GFX103COREPLUS = 0x00000018, - ge1_sclk_reg_vld__GFX103COREPLUS = 0x00000019, - ge1_stat_busy__GFX103COREPLUS = 0x0000001a, - ge1_stat_no_dma_busy__GFX103COREPLUS = 0x0000001b, - ge1_pipe0_to_pipe1__GFX103COREPLUS = 0x0000001c, - ge1_pipe1_to_pipe0__GFX103COREPLUS = 0x0000001d, - ge1_dma_return_size_cl0__GFX103COREPLUS = 0x0000001e, - ge1_dma_return_size_cl1__GFX103COREPLUS = 0x0000001f, - ge1_sclk_input_vld__GFX103COREPLUS = 0x00000021, - ge1_prim_group_limit_hit__GFX103COREPLUS = 0x00000022, - ge1_rbiu_di_fifo_stalled_p1__GFX103COREPLUS = 0x00000024, - ge1_rbiu_di_fifo_starved_p1__GFX103COREPLUS = 0x00000025, - ge1_rbiu_dr_fifo_stalled_p1__GFX103COREPLUS = 0x00000026, - ge1_rbiu_dr_fifo_starved_p1__GFX103COREPLUS = 0x00000027, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - ge1_small_draws_one_instance__GFX11 = 0x00000020, - ge1_unopt_multi_instance_draws__GFX11 = 0x00000023, -#endif -} GE1_PERFCOUNT_SELECT; - -constexpr unsigned int MaxGe1PerfcountSelectGfx103CorePlus = ge1_rbiu_dr_fifo_starved_p1__GFX103COREPLUS; - -typedef enum GE2_DIST_PERFCOUNT_SELECT { - ge_dist_hs_done = 0x00000000, - ge_dist_hs_done_latency_se0 = 0x00000001, - ge_dist_hs_done_latency_se1 = 0x00000002, - ge_dist_hs_done_latency_se2 = 0x00000003, - ge_dist_hs_done_latency_se3 = 0x00000004, - ge_dist_inside_tf_bin_0__GFX103DERIVATIVE = 0x00000005, - ge_dist_inside_tf_bin_1__GFX103DERIVATIVE = 0x00000006, - ge_dist_inside_tf_bin_2__GFX103DERIVATIVE = 0x00000007, - ge_dist_inside_tf_bin_3__GFX103DERIVATIVE = 0x00000008, - ge_dist_inside_tf_bin_4__GFX103DERIVATIVE = 0x00000009, - ge_dist_inside_tf_bin_5__GFX103DERIVATIVE = 0x0000000a, - ge_dist_inside_tf_bin_6__GFX103DERIVATIVE = 0x0000000b, - ge_dist_inside_tf_bin_7__GFX103DERIVATIVE = 0x0000000c, - ge_dist_inside_tf_bin_8__GFX103DERIVATIVE = 0x0000000d, - ge_dist_null_patch__GFX103DERIVATIVE = 0x0000000e, - ge_dist_sclk_core_vld__GFX103DERIVATIVE = 0x0000000f, - ge_dist_sclk_wd_te11_vld__GFX103DERIVATIVE = 0x00000010, - ge_dist_tfreq_lat_bin_0__GFX103DERIVATIVE = 0x00000011, - ge_dist_tfreq_lat_bin_1__GFX103DERIVATIVE = 0x00000012, - ge_dist_tfreq_lat_bin_2__GFX103DERIVATIVE = 0x00000013, - ge_dist_tfreq_lat_bin_3__GFX103DERIVATIVE = 0x00000014, - ge_dist_tfreq_lat_bin_4__GFX103DERIVATIVE = 0x00000015, - ge_dist_tfreq_lat_bin_5__GFX103DERIVATIVE = 0x00000016, - ge_dist_tfreq_lat_bin_6__GFX103DERIVATIVE = 0x00000017, - ge_dist_tfreq_lat_bin_7__GFX103DERIVATIVE = 0x00000018, - ge_dist_tfreq_utcl1_consecutive_retry_event__GFX103DERIVATIVE = 0x00000019, - ge_dist_tfreq_utcl1_request_event__GFX103DERIVATIVE = 0x0000001a, - ge_dist_tfreq_utcl1_retry_event__GFX103DERIVATIVE = 0x0000001b, - ge_dist_tfreq_utcl1_stall_event__GFX103DERIVATIVE = 0x0000001c, - ge_dist_tfreq_utcl1_stall_utcl2_event__GFX103DERIVATIVE = 0x0000001d, - ge_dist_tfreq_utcl1_translation_hit_event__GFX103DERIVATIVE = 0x0000001e, - ge_dist_tfreq_utcl1_translation_miss_event__GFX103DERIVATIVE = 0x0000001f, - ge_dist_vs_pc_stall__GFX103DERIVATIVE = 0x00000020, - ge_dist_pc_feorder_fifo_full__GFX103DERIVATIVE = 0x00000021, - ge_dist_pc_ge_manager_busy__GFX103DERIVATIVE = 0x00000022, - ge_dist_pc_req_stall_se0__GFX103DERIVATIVE = 0x00000023, - ge_dist_pc_req_stall_se1__GFX103DERIVATIVE = 0x00000024, - ge_dist_pc_req_stall_se2__GFX103DERIVATIVE = 0x00000025, - ge_dist_pc_req_stall_se3__GFX103DERIVATIVE = 0x00000026, - ge_dist_pc_space_zero__GFX103DERIVATIVE = 0x00000027, - ge_dist_sclk_input_vld__GFX103DERIVATIVE = 0x00000028, - ge_dist_reserved__GFX103DERIVATIVE = 0x00000029, - ge_dist_wd_te11_busy__GFX103DERIVATIVE = 0x0000002a, - ge_dist_te11_starved__GFX103DERIVATIVE = 0x0000002b, - ge_dist_switch_mode_stall__GFX103DERIVATIVE = 0x0000002c, - ge_all_tf_eq__GFX103DERIVATIVE = 0x0000002d, - ge_all_tf2__GFX103DERIVATIVE = 0x0000002e, - ge_all_tf3__GFX103DERIVATIVE = 0x0000002f, - ge_all_tf4__GFX103DERIVATIVE = 0x00000030, - ge_all_tf5__GFX103DERIVATIVE = 0x00000031, - ge_all_tf6__GFX103DERIVATIVE = 0x00000032, - ge_se0_te11_starved_on_hs_done__GFX103DERIVATIVE = 0x00000033, - ge_se1_te11_starved_on_hs_done__GFX103DERIVATIVE = 0x00000034, - ge_se2_te11_starved_on_hs_done__GFX103DERIVATIVE = 0x00000035, - ge_se3_te11_starved_on_hs_done__GFX103DERIVATIVE = 0x00000036, - ge_dist_op_fifo_full_starve__GFX103DERIVATIVE = 0x00000037, - ge_dist_hs_done_se0__GFX103DERIVATIVE = 0x00000038, - ge_dist_hs_done_se1__GFX103DERIVATIVE = 0x00000039, - ge_dist_hs_done_se2__GFX103DERIVATIVE = 0x0000003a, - ge_dist_hs_done_se3__GFX103DERIVATIVE = 0x0000003b, - ge_dist_hs_done_latency__GFX103DERIVATIVE = 0x0000003c, - ge_dist_distributer_busy__GFX103DERIVATIVE = 0x0000003d, - ge_tf_ret_data_stalling_hs_done__GFX103DERIVATIVE = 0x0000003e, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - ge_dist_hs_done_latency_se4__GFX11 = 0x00000005, - ge_dist_hs_done_latency_se5__GFX11 = 0x00000006, - ge_dist_hs_done_latency_se6__GFX11 = 0x00000007, - ge_dist_hs_done_latency_se7__GFX11 = 0x00000008, - ge_dist_inside_tf_bin_0__GFX11 = 0x00000009, - ge_dist_inside_tf_bin_1__GFX11 = 0x0000000a, - ge_dist_inside_tf_bin_2__GFX11 = 0x0000000b, - ge_dist_inside_tf_bin_3__GFX11 = 0x0000000c, - ge_dist_inside_tf_bin_4__GFX11 = 0x0000000d, - ge_dist_inside_tf_bin_5__GFX11 = 0x0000000e, - ge_dist_inside_tf_bin_6__GFX11 = 0x0000000f, - ge_dist_inside_tf_bin_7__GFX11 = 0x00000010, - ge_dist_inside_tf_bin_8__GFX11 = 0x00000011, - ge_dist_null_patch__GFX11 = 0x00000012, - ge_dist_sclk_core_vld__GFX11 = 0x00000013, - ge_dist_sclk_wd_te11_vld__GFX11 = 0x00000014, - ge_dist_tfreq_lat_bin_0__GFX11 = 0x00000015, - ge_dist_tfreq_lat_bin_1__GFX11 = 0x00000016, - ge_dist_tfreq_lat_bin_2__GFX11 = 0x00000017, - ge_dist_tfreq_lat_bin_3__GFX11 = 0x00000018, - ge_dist_tfreq_lat_bin_4__GFX11 = 0x00000019, - ge_dist_tfreq_lat_bin_5__GFX11 = 0x0000001a, - ge_dist_tfreq_lat_bin_6__GFX11 = 0x0000001b, - ge_dist_tfreq_lat_bin_7__GFX11 = 0x0000001c, - ge_dist_tfreq_utcl1_consecutive_retry_event__GFX11 = 0x0000001d, - ge_dist_tfreq_utcl1_request_event__GFX11 = 0x0000001e, - ge_dist_tfreq_utcl1_retry_event__GFX11 = 0x0000001f, - ge_dist_tfreq_utcl1_stall_event__GFX11 = 0x00000020, - ge_dist_tfreq_utcl1_stall_utcl2_event__GFX11 = 0x00000021, - ge_dist_tfreq_utcl1_translation_hit_event__GFX11 = 0x00000022, - ge_dist_tfreq_utcl1_translation_miss_event__GFX11 = 0x00000023, - ge_dist_pc_feorder_fifo_full__GFX11 = 0x00000024, - ge_dist_pc_ge_manager_busy__GFX11 = 0x00000025, - ge_dist_sclk_input_vld__GFX11 = 0x00000026, - ge_dist_wd_te11_busy__GFX11 = 0x00000027, - ge_dist_te11_starved__GFX11 = 0x00000028, - ge_dist_switch_mode_stall__GFX11 = 0x00000029, - ge_all_tf_eq__GFX11 = 0x0000002a, - ge_all_tf2__GFX11 = 0x0000002b, - ge_all_tf3__GFX11 = 0x0000002c, - ge_all_tf4__GFX11 = 0x0000002d, - ge_all_tf5__GFX11 = 0x0000002e, - ge_all_tf6__GFX11 = 0x0000002f, - ge_se0_te11_starved_on_hs_done__GFX11 = 0x00000030, - ge_se1_te11_starved_on_hs_done__GFX11 = 0x00000031, - ge_se2_te11_starved_on_hs_done__GFX11 = 0x00000032, - ge_se3_te11_starved_on_hs_done__GFX11 = 0x00000033, - ge_se4_te11_starved_on_hs_done__GFX11 = 0x00000034, - ge_se5_te11_starved_on_hs_done__GFX11 = 0x00000035, - ge_se6_te11_starved_on_hs_done__GFX11 = 0x00000036, - ge_se7_te11_starved_on_hs_done__GFX11 = 0x00000037, - ge_dist_op_fifo_full_starve__GFX11 = 0x00000038, - ge_dist_hs_done_se0__GFX11 = 0x00000039, - ge_dist_hs_done_se1__GFX11 = 0x0000003a, - ge_dist_hs_done_se2__GFX11 = 0x0000003b, - ge_dist_hs_done_se3__GFX11 = 0x0000003c, - ge_dist_hs_done_se4__GFX11 = 0x0000003d, - ge_dist_hs_done_se5__GFX11 = 0x0000003e, - ge_dist_hs_done_se6__GFX11 = 0x0000003f, - ge_dist_hs_done_se7__GFX11 = 0x00000040, - ge_dist_hs_done_latency__GFX11 = 0x00000041, - ge_dist_distributer_busy__GFX11 = 0x00000042, - ge_tf_ret_data_stalling_hs_done__GFX11 = 0x00000043, - ge_num_of_no_dist_patches__GFX11 = 0x00000044, - ge_num_of_donut_dist_patches__GFX11 = 0x00000045, - ge_num_of_patch_dist_patches__GFX11 = 0x00000046, - ge_num_of_se_switches_due_to_patch_accum__GFX11 = 0x00000047, - ge_num_of_se_switches_due_to_donut__GFX11 = 0x00000048, - ge_num_of_se_switches_due_to_trap__GFX11 = 0x00000049, - ge_num_of_hs_dealloc_events__GFX11 = 0x0000004a, - ge_agm_gcr_req__GFX11 = 0x0000004b, - ge_agm_gcr_tag_stall__GFX11 = 0x0000004c, - ge_agm_gcr_crd_stall__GFX11 = 0x0000004d, - ge_agm_gcr_stall__GFX11 = 0x0000004e, - ge_agm_gcr_latency__GFX11 = 0x0000004f, - ge_distclk_vld__GFX11 = 0x00000050, - ge_dist_indx_fifos_full_and_empty__GFX11 = 0x00000051, - ge_hs_done_all_tf0_se0__GFX11 = 0x00000052, - ge_hs_done_all_tf0_se1__GFX11 = 0x00000053, - ge_hs_done_all_tf0_se2__GFX11 = 0x00000054, - ge_hs_done_all_tf0_se3__GFX11 = 0x00000055, - ge_hs_done_all_tf0_se4__GFX11 = 0x00000056, - ge_hs_done_all_tf0_se5__GFX11 = 0x00000057, - ge_hs_done_all_tf0_se6__GFX11 = 0x00000058, - ge_hs_done_all_tf0_se7__GFX11 = 0x00000059, - ge_hs_done_all_tf1_se0__GFX11 = 0x0000005a, - ge_hs_done_all_tf1_se1__GFX11 = 0x0000005b, - ge_hs_done_all_tf1_se2__GFX11 = 0x0000005c, - ge_hs_done_all_tf1_se3__GFX11 = 0x0000005d, - ge_hs_done_all_tf1_se4__GFX11 = 0x0000005e, - ge_hs_done_all_tf1_se5__GFX11 = 0x0000005f, - ge_hs_done_all_tf1_se6__GFX11 = 0x00000060, - ge_hs_done_all_tf1_se7__GFX11 = 0x00000061, - ge_agm_gcr_req_outstanding__GFX11 = 0x00000062, - ge_agm_gcr_req_amount__GFX11 = 0x00000063, - ge_agm_gcr_combine__GFX11 = 0x00000064, -#endif -} GE2_DIST_PERFCOUNT_SELECT; - -constexpr unsigned int MaxGe2DistPerfcountSelectGfx103Derivative = ge_tf_ret_data_stalling_hs_done__GFX103DERIVATIVE; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxGe2DistPerfcountSelectGfx11 = ge_agm_gcr_combine__GFX11; -#endif - -typedef enum GE2_SE_PERFCOUNT_SELECT { - ge_se_es_ring_high_water_mark__GFX103 = 0x00000008, - ge_se_es_thread_groups__GFX103 = 0x00000009, - ge_se_esthread_stalled_es_rb_full__GFX103 = 0x0000000a, - ge_se_esthread_stalled_spi_bp__GFX103 = 0x0000000b, - ge_se_esvert_stalled_gs_event__GFX103 = 0x0000000c, - ge_se_esvert_stalled_gs_tbl__GFX103 = 0x0000000d, - ge_se_esvert_stalled_gsprim__GFX103 = 0x0000000e, - ge_se_gog_busy__GFX103 = 0x0000000f, - ge_se_gog_out_indx_stalled__GFX103 = 0x00000010, - ge_se_gog_out_prim_stalled__GFX103 = 0x00000011, - ge_se_gog_vs_tbl_stalled__GFX103 = 0x00000012, - ge_se_gs_cache_hits__GFX103 = 0x00000013, - ge_se_gs_counters_avail_stalled__GFX103 = 0x00000014, - ge_se_gs_done__GFX103 = 0x00000015, - ge_se_gs_done_latency__GFX103 = 0x00000016, - ge_se_gs_issue_rtr_stalled__GFX103 = 0x00000017, - ge_se_gs_rb_space_avail_stalled__GFX103 = 0x00000018, - ge_se_gs_ring_high_water_mark__GFX103 = 0x00000019, - ge_se_gsprim_stalled_es_tbl__GFX103 = 0x0000001a, - ge_se_gsprim_stalled_esvert__GFX103 = 0x0000001b, - ge_se_gsprim_stalled_gs_event__GFX103 = 0x0000001c, - ge_se_gsprim_stalled_gs_tbl__GFX103 = 0x0000001d, - ge_se_gsthread_stalled__GFX103 = 0x0000001e, - ge_se_hs_tfm_stall__GFX103 = 0x0000001f, - ge_se_hs_tgs_active_high_water_mark__GFX103 = 0x00000020, - ge_se_hs_thread_groups__GFX103 = 0x00000021, - ge_se_pa0_clipp_eop__GFX103 = 0x00000022, - ge_se_pa0_clipp_eopg__GFX103 = 0x00000023, - ge_se_pa0_clipp_is_event__GFX103 = 0x00000024, - ge_se_pa0_clipp_new_vtx_vect__GFX103 = 0x00000025, - ge_se_pa0_clipp_null_prim__GFX103 = 0x00000026, - ge_se_pa0_clipp_send__GFX103 = 0x00000027, - ge_se_pa0_clipp_send_not_event__GFX103 = 0x00000028, - ge_se_pa0_clipp_stalled__GFX103 = 0x00000029, - ge_se_pa0_clipp_starved_busy__GFX103 = 0x0000002a, - ge_se_pa0_clipp_starved_after_work__GFX103 = 0x0000002b, - ge_se_pa0_clipp_valid_prim__GFX103 = 0x0000002c, - ge_se_pa0_clips_send__GFX103 = 0x0000002d, - ge_se_pa0_clips_stalled__GFX103 = 0x0000002e, - ge_se_pa0_clipv_send__GFX103 = 0x0000002f, - ge_se_pa0_clipv_stalled__GFX103 = 0x00000030, - ge_se_pa1_clipp_eop__GFX103 = 0x00000031, - ge_se_pa1_clipp_eopg__GFX103 = 0x00000032, - ge_se_pa1_clipp_is_event__GFX103 = 0x00000033, - ge_se_pa1_clipp_new_vtx_vect__GFX103 = 0x00000034, - ge_se_pa1_clipp_null_prim__GFX103 = 0x00000035, - ge_se_pa1_clipp_send__GFX103 = 0x00000036, - ge_se_pa1_clipp_send_not_event__GFX103 = 0x00000037, - ge_se_pa1_clipp_stalled__GFX103 = 0x00000038, - ge_se_pa1_clipp_starved_busy__GFX103 = 0x00000039, - ge_se_pa1_clipp_starved_after_work__GFX103 = 0x0000003a, - ge_se_pa1_clipp_valid_prim__GFX103 = 0x0000003b, - ge_se_pa1_clips_send__GFX103 = 0x0000003c, - ge_se_pa1_clips_stalled__GFX103 = 0x0000003d, - ge_se_pa1_clipv_send__GFX103 = 0x0000003e, - ge_se_pa1_clipv_stalled__GFX103 = 0x0000003f, - ge_se_reused_es_indices__GFX103 = 0x00000040, - ge_se_reused_vs_indices__GFX103 = 0x00000041, - ge_se_sclk_gs_vld__GFX103 = 0x00000042, - ge_se_sclk_ngg_vld__GFX103 = 0x00000043, - ge_se_sclk_te11_vld__GFX103 = 0x00000044, - ge_se_sclk_vr_vld__GFX103 = 0x00000045, - ge_se_spi_esvert_eov__GFX103 = 0x00000046, - ge_se_spi_esvert_stalled__GFX103 = 0x00000047, - ge_se_spi_esvert_starved_busy__GFX103 = 0x00000048, - ge_se_spi_esvert_valid__GFX103 = 0x00000049, - ge_se_spi_eswave_is_event__GFX103 = 0x0000004a, - ge_se_spi_eswave_send__GFX103 = 0x0000004b, - ge_se_spi_gsprim_cont__GFX103 = 0x0000004c, - ge_se_spi_gsprim_eov__GFX103 = 0x0000004d, - ge_se_spi_gsprim_stalled__GFX103 = 0x0000004e, - ge_se_spi_gsprim_starved_busy__GFX103 = 0x0000004f, - ge_se_spi_gsprim_valid__GFX103 = 0x00000050, - ge_se_spi_gssubgrp_is_event__GFX103 = 0x00000051, - ge_se_spi_gssubgrp_send__GFX103 = 0x00000052, - ge_se_spi_hsvert_eov__GFX103 = 0x00000053, - ge_se_spi_hsvert_stalled__GFX103 = 0x00000054, - ge_se_spi_hsvert_starved_busy__GFX103 = 0x00000055, - ge_se_spi_hsvert_valid__GFX103 = 0x00000056, - ge_se_spi_hswave_is_event__GFX103 = 0x00000057, - ge_se_spi_hswave_send__GFX103 = 0x00000058, - ge_se_spi_lsvert_eov__GFX103 = 0x00000059, - ge_se_spi_lsvert_stalled__GFX103 = 0x0000005a, - ge_se_spi_lsvert_starved_busy__GFX103 = 0x0000005b, - ge_se_spi_lsvert_valid__GFX103 = 0x0000005c, - ge_se_spi_hsvert_fifo_full_stall__GFX103 = 0x0000005d, - ge_se_spi_tgrp_fifo_stall__GFX103 = 0x0000005e, - ge_se_spi_vsvert_eov__GFX103 = 0x0000005f, - ge_se_spi_vsvert_send__GFX103 = 0x00000060, - ge_se_spi_vsvert_stalled__GFX103 = 0x00000061, - ge_se_spi_vsvert_starved_busy__GFX103 = 0x00000062, - ge_se_spi_vswave_is_event__GFX103 = 0x00000063, - ge_se_spi_vswave_send__GFX103 = 0x00000064, - ge_spi_hsgrp_spi_stall__GFX103 = 0x00000065, - ge_se_rcm_indicies_hit__GFX103 = 0x00000066, - ge_se_vs_cache_hits__GFX103 = 0x00000067, - ge_se_vs_done__GFX103 = 0x00000068, - ge_se_vs_table_high_water_mark__GFX103 = 0x00000069, - ge_se_vs_thread_groups__GFX103 = 0x0000006a, - ge_se_vsvert_api_send__GFX103 = 0x0000006b, - ge_se_vsvert_ds_send__GFX103 = 0x0000006c, - ge_se_wait_for_es_done_stalled__GFX103 = 0x0000006d, - ge_se_waveid_stalled__GFX103 = 0x0000006e, - ge_se_spi_vsvert_valid__GFX103 = 0x0000006f, - ge_se_spi_gssubgrp_event_window_active__GFX103 = 0x00000070, - ge_se_hs_input_stall__GFX103 = 0x00000071, - vgt_se_pa0_clipv_starved_busy__GFX103 = 0x00000072, - vgt_se_pa0_clipv_firstvert__GFX103 = 0x00000073, - vgt_se_pa0_clips_starved_busy__GFX103 = 0x00000074, - vgt_se_pa1_clipv_starved_busy__GFX103 = 0x00000075, - vgt_se_pa1_clipv_firstvert__GFX103 = 0x00000076, - vgt_se_pa1_clips_starved_busy__GFX103 = 0x00000077, - ge_se_sending_vert_or_prim__GFX103 = 0x00000078, - ge_se_sclk_input_vld__GFX103 = 0x00000079, - ge_spi_lswave_fifo_full_stall__GFX103 = 0x0000007a, - ge_spi_hswave_fifo_full_stall__GFX103 = 0x0000007b, - ge_hs_tif_stall__GFX103 = 0x0000007c, - ge_csb_spi_bp__GFX103 = 0x0000007d, - ge_ngg_starving_for_pc_grant__GFX103 = 0x0000007e, - ge_vs_starving_for_pc_grant__GFX103 = 0x0000007f, - ge_se_strmout_stalled__GFX103 = 0x00000080, - ge_pa0_csb_eop__GFX103 = 0x00000081, - ge_pa1_csb_eop__GFX103 = 0x00000082, - ge_ngg_starved_idle__GFX103 = 0x00000083, - ge_gsprim_send__GFX103 = 0x00000084, - ge_esvert_send__GFX103 = 0x00000085, - ge_ngg_starved_after_work__GFX103 = 0x00000086, - ge_ngg_subgrp_fifo_stall__GFX103 = 0x00000087, - ge_ngg_ord_id_req_stall__GFX103 = 0x00000088, - ge_ngg_indx_bus_stall__GFX103 = 0x00000089, - ge_hs_stall_tfmm_fifo_full__GFX103 = 0x0000008a, - ge_se_cm_reading_stalled__GFX103DERIVATIVE = 0x00000000, - ge_se_cm_stalled_by_gog__GFX103DERIVATIVE = 0x00000001, - ge_se_cm_stalled_by_gsfetch_done__GFX103DERIVATIVE = 0x00000002, - ge_se_ds_cache_hits__GFX103DERIVATIVE = 0x00000003, - ge_se_ds_prims__GFX103DERIVATIVE = 0x00000004, - ge_se_es_done__GFX103DERIVATIVE = 0x00000005, - ge_se_es_done_latency__GFX103DERIVATIVE = 0x00000006, - ge_se_es_flush__GFX103DERIVATIVE = 0x00000007, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - ge_se_ds_prims__GFX11 = 0x00000000, - ge_se_es_thread_groups__GFX11 = 0x00000001, - ge_se_esvert_stalled_gsprim__GFX11 = 0x00000002, - ge_se_hs_tfm_stall__GFX11 = 0x00000003, - ge_se_hs_tgs_active_high_water_mark__GFX11 = 0x00000004, - ge_se_hs_thread_groups__GFX11 = 0x00000005, - ge_se_reused_es_indices__GFX11 = 0x00000006, - ge_se_sclk_ngg_vld__GFX11 = 0x00000007, - ge_se_sclk_te11_vld__GFX11 = 0x00000008, - ge_se_spi_esvert_eov__GFX11 = 0x00000009, - ge_se_spi_esvert_stalled__GFX11 = 0x0000000a, - ge_se_spi_esvert_starved_busy__GFX11 = 0x0000000b, - ge_se_spi_esvert_valid__GFX11 = 0x0000000c, - ge_se_spi_gsprim_cont__GFX11 = 0x0000000d, - ge_se_spi_gsprim_eov__GFX11 = 0x0000000e, - ge_se_spi_gsprim_stalled__GFX11 = 0x0000000f, - ge_se_spi_gsprim_starved_busy__GFX11 = 0x00000010, - ge_se_spi_gsprim_valid__GFX11 = 0x00000011, - ge_se_spi_gssubgrp_is_event__GFX11 = 0x00000012, - ge_se_spi_gssubgrp_send__GFX11 = 0x00000013, - ge_se_spi_hsvert_eov__GFX11 = 0x00000014, - ge_se_spi_hsvert_stalled__GFX11 = 0x00000015, - ge_se_spi_hsvert_starved_busy__GFX11 = 0x00000016, - ge_se_spi_hsvert_valid__GFX11 = 0x00000017, - ge_se_spi_hsgrp_is_event__GFX11 = 0x00000018, - ge_se_spi_hsgrp_send__GFX11 = 0x00000019, - ge_se_spi_lsvert_eov__GFX11 = 0x0000001a, - ge_se_spi_lsvert_stalled__GFX11 = 0x0000001b, - ge_se_spi_lsvert_starved_busy__GFX11 = 0x0000001c, - ge_se_spi_lsvert_valid__GFX11 = 0x0000001d, - ge_se_spi_hsvert_fifo_full_stall__GFX11 = 0x0000001e, - ge_se_spi_tgrp_fifo_stall__GFX11 = 0x0000001f, - ge_spi_hsgrp_spi_stall__GFX11 = 0x00000020, - ge_se_spi_gssubgrp_event_window_active__GFX11 = 0x00000021, - ge_se_hs_input_stall__GFX11 = 0x00000022, - ge_se_sending_vert_or_prim__GFX11 = 0x00000023, - ge_se_sclk_input_vld__GFX11 = 0x00000024, - ge_spi_lswave_fifo_full_stall__GFX11 = 0x00000025, - ge_spi_hswave_fifo_full_stall__GFX11 = 0x00000026, - ge_hs_tif_stall__GFX11 = 0x00000027, - ge_csb_spi_bp__GFX11 = 0x00000028, - ge_ngg_starving_for_wave_id__GFX11 = 0x00000029, - ge_pa0_csb_eop__GFX11 = 0x0000002a, - ge_ngg_starved_idle__GFX11 = 0x0000002b, - ge_gsprim_send__GFX11 = 0x0000002c, - ge_esvert_send__GFX11 = 0x0000002d, - ge_ngg_starved_after_work__GFX11 = 0x0000002e, - ge_ngg_subgrp_fifo_stall__GFX11 = 0x0000002f, - ge_ngg_ord_id_req_stall__GFX11 = 0x00000030, - ge_ngg_indx_bus_stall__GFX11 = 0x00000031, - ge_hs_stall_tfmm_fifo_full__GFX11 = 0x00000032, - ge_gs_issue_rtr_stalled__GFX11 = 0x00000033, - ge_gsprim_stalled_esvert__GFX11 = 0x00000034, - ge_gsthread_stalled__GFX11 = 0x00000035, - ge_ngg_attr_grp_alloc__GFX11 = 0x00000036, - ge_ngg_attr_discard_alloc__GFX11 = 0x00000037, - ge_ngg_pc_space_not_avail__GFX11 = 0x00000038, - ge_ngg_agm_req_stall__GFX11 = 0x00000039, - ge_ngg_spi_esvert_partial_eov__GFX11 = 0x0000003a, - ge_ngg_spi_gsprim_partial_eov__GFX11 = 0x0000003b, - ge_spi_gsgrp_valid__GFX11 = 0x0000003c, - ge_ngg_attr_grp_latency__GFX11 = 0x0000003d, - ge_ngg_reuse_prim_limit_hit__GFX11 = 0x0000003e, - ge_ngg_reuse_vert_limit_hit__GFX11 = 0x0000003f, - ge_te11_con_stall__GFX11 = 0x00000040, - ge_te11_compactor_starved__GFX11 = 0x00000041, - ge_ngg_stall_tess_off_tess_on__GFX11 = 0x00000042, - ge_ngg_stall_tess_on_tess_off__GFX11 = 0x00000043, - ge_merged_lses_vert_stalled__GFX11 = 0x00000044, - ge_merged_hsgs_vert_stalled__GFX11 = 0x00000045, - ge_merged_hsgs_grp_stalled__GFX11 = 0x00000046, - ge_merge_lses_fifo_blocked__GFX11 = 0x00000047, - ge_merge_hsgs_fifo_blocked__GFX11 = 0x00000048, - ge_merge_lses_vert_switch__GFX11 = 0x00000049, - ge_merge_hsgs_vert_switch__GFX11 = 0x0000004a, - ge_merge_hsgs_grp_switch__GFX11 = 0x0000004b, - ge_merge_gsgrp_rdy_pending_verts__GFX11 = 0x0000004c, - ge_merge_hsgrp_rdy_pending_verts__GFX11 = 0x0000004d, - ge_se_ds_cache_hits__GFX11 = 0x0000004e, - ge_se_api_vs_verts__GFX11 = 0x0000004f, - ge_se_api_ds_verts__GFX11 = 0x00000050, - ge_se_combined_busy__GFX11 = 0x00000051, - ge_spi_lsvert_send__GFX11 = 0x00000052, - ge_spi_hsvert_send__GFX11 = 0x00000053, - ge_ngg_attr_grp_wasted__GFX11 = 0x00000054, - ge_spi_gssubgrp_stalled__GFX11 = 0x00000055, - ge_ngg_attr_null_dealloc__GFX11 = 0x00000056, - ge_ngg_busy_base__GFX11 = 0x00000057, -#endif -} GE2_SE_PERFCOUNT_SELECT; - -constexpr unsigned int MaxGe2SePerfcountSelectGfx103 = ge_hs_stall_tfmm_fifo_full__GFX103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxGe2SePerfcountSelectGfx11 = ge_ngg_busy_base__GFX11; -#endif - -typedef enum GE_PERFCOUNT_SELECT { - ge_assembler_busy__GFX101 = 0x00000000, - ge_assembler_stalled__GFX101 = 0x00000001, - ge_cm_reading_stalled__GFX101 = 0x00000002, - ge_cm_stalled_by_gog__GFX101 = 0x00000003, - ge_cm_stalled_by_gsfetch_done__GFX101 = 0x00000004, - ge_dma_busy__GFX101 = 0x00000005, - ge_dma_lat_bin_0__GFX101 = 0x00000006, - ge_dma_lat_bin_1__GFX101 = 0x00000007, - ge_dma_lat_bin_2__GFX101 = 0x00000008, - ge_dma_lat_bin_3__GFX101 = 0x00000009, - ge_dma_lat_bin_4__GFX101 = 0x0000000a, - ge_dma_lat_bin_5__GFX101 = 0x0000000b, - ge_dma_lat_bin_6__GFX101 = 0x0000000c, - ge_dma_lat_bin_7__GFX101 = 0x0000000d, - ge_dma_return_cl0__GFX101 = 0x0000000e, - ge_dma_return_cl1__GFX101 = 0x0000000f, - ge_dma_utcl1_consecutive_retry_event__GFX101 = 0x00000010, - ge_dma_utcl1_request_event__GFX101 = 0x00000011, - ge_dma_utcl1_retry_event__GFX101 = 0x00000012, - ge_dma_utcl1_stall_event__GFX101 = 0x00000013, - ge_dma_utcl1_stall_utcl2_event__GFX101 = 0x00000014, - ge_dma_utcl1_translation_hit_event__GFX101 = 0x00000015, - ge_dma_utcl1_translation_miss_event__GFX101 = 0x00000016, - ge_ds_cache_hits__GFX101 = 0x00000017, - ge_ds_prims__GFX101 = 0x00000018, - ge_es_done__GFX101 = 0x00000019, - ge_es_done_latency__GFX101 = 0x0000001a, - ge_es_flush__GFX101 = 0x0000001b, - ge_es_ring_high_water_mark__GFX101 = 0x0000001c, - ge_es_thread_groups__GFX101 = 0x0000001d, - ge_esthread_stalled_es_rb_full__GFX101 = 0x0000001e, - ge_esthread_stalled_spi_bp__GFX101 = 0x0000001f, - ge_esvert_stalled_gs_event__GFX101 = 0x00000020, - ge_esvert_stalled_gs_tbl__GFX101 = 0x00000021, - ge_esvert_stalled_gsprim__GFX101 = 0x00000022, - ge_assembler_dma_starved__GFX101 = 0x00000023, - ge_gog_busy__GFX101 = 0x00000024, - ge_gog_out_indx_stalled__GFX101 = 0x00000025, - ge_gog_out_prim_stalled__GFX101 = 0x00000026, - ge_gog_vs_tbl_stalled__GFX101 = 0x00000027, - ge_gs_cache_hits__GFX101 = 0x00000028, - ge_gs_counters_avail_stalled__GFX101 = 0x00000029, - ge_gs_done__GFX101 = 0x0000002a, - ge_gs_done_latency__GFX101 = 0x0000002b, - ge_gs_issue_rtr_stalled__GFX101 = 0x0000002c, - ge_gs_rb_space_avail_stalled__GFX101 = 0x0000002d, - ge_gs_ring_high_water_mark__GFX101 = 0x0000002e, - ge_gsprim_stalled_es_tbl__GFX101 = 0x0000002f, - ge_gsprim_stalled_esvert__GFX101 = 0x00000030, - ge_gsprim_stalled_gs_event__GFX101 = 0x00000031, - ge_gsprim_stalled_gs_tbl__GFX101 = 0x00000032, - ge_gsthread_stalled__GFX101 = 0x00000033, - ge_hs_done__GFX101 = 0x00000034, - ge_hs_done_latency__GFX101 = 0x00000035, - ge_hs_done_se0__GFX101 = 0x00000036, - ge_hs_done_se1__GFX101 = 0x00000037, - ge_hs_done_se2_reserved__GFX101 = 0x00000038, - ge_hs_done_se3_reserved__GFX101 = 0x00000039, - ge_hs_tfm_stall__GFX101 = 0x0000003a, - ge_hs_tgs_active_high_water_mark__GFX101 = 0x0000003b, - ge_hs_thread_groups__GFX101 = 0x0000003c, - ge_inside_tf_bin_0__GFX101 = 0x0000003d, - ge_inside_tf_bin_1__GFX101 = 0x0000003e, - ge_inside_tf_bin_2__GFX101 = 0x0000003f, - ge_inside_tf_bin_3__GFX101 = 0x00000040, - ge_inside_tf_bin_4__GFX101 = 0x00000041, - ge_inside_tf_bin_5__GFX101 = 0x00000042, - ge_inside_tf_bin_6__GFX101 = 0x00000043, - ge_inside_tf_bin_7__GFX101 = 0x00000044, - ge_inside_tf_bin_8__GFX101 = 0x00000045, - ge_ls_done__GFX101 = 0x00000046, - ge_ls_done_latency__GFX101 = 0x00000047, - ge_null_patch__GFX101 = 0x00000048, - ge_se0pa0_clipp_eop__GFX101 = 0x00000049, - ge_se0pa0_clipp_eopg__GFX101 = 0x0000004a, - ge_se0pa0_clipp_is_event__GFX101 = 0x0000004b, - ge_se0pa0_clipp_new_vtx_vect__GFX101 = 0x0000004c, - ge_se0pa0_clipp_null_prim__GFX101 = 0x0000004d, - ge_se0pa0_clipp_send__GFX101 = 0x0000004e, - ge_se0pa0_clipp_send_not_event__GFX101 = 0x0000004f, - ge_se0pa0_clipp_stalled__GFX101 = 0x00000050, - ge_se0pa0_clipp_starved_busy__GFX101 = 0x00000051, - ge_se0pa0_clipp_starved_after_work__GFX101 = 0x00000052, - ge_se0pa0_clipp_valid_prim__GFX101 = 0x00000053, - ge_se0pa0_clips_send__GFX101 = 0x00000054, - ge_se0pa0_clips_stalled__GFX101 = 0x00000055, - ge_se0pa0_clipv_send__GFX101 = 0x00000056, - ge_se0pa0_clipv_stalled__GFX101 = 0x00000057, - ge_se0pa1_clipp_eop__GFX101 = 0x00000058, - ge_se0pa1_clipp_eopg__GFX101 = 0x00000059, - ge_se0pa1_clipp_is_event__GFX101 = 0x0000005a, - ge_se0pa1_clipp_new_vtx_vect__GFX101 = 0x0000005b, - ge_se0pa1_clipp_null_prim__GFX101 = 0x0000005c, - ge_se0pa1_clipp_send__GFX101 = 0x0000005d, - ge_se0pa1_clipp_send_not_event__GFX101 = 0x0000005e, - ge_se0pa1_clipp_stalled__GFX101 = 0x0000005f, - ge_se0pa1_clipp_starved_busy__GFX101 = 0x00000060, - ge_se0pa1_clipp_starved_after_work__GFX101 = 0x00000061, - ge_se0pa1_clipp_valid_prim__GFX101 = 0x00000062, - ge_se0pa1_clips_send__GFX101 = 0x00000063, - ge_se0pa1_clips_stalled__GFX101 = 0x00000064, - ge_se0pa1_clipv_send__GFX101 = 0x00000065, - ge_se0pa1_clipv_stalled__GFX101 = 0x00000066, - ge_se1pa0_clipp_eop__GFX101 = 0x00000067, - ge_se1pa0_clipp_eopg__GFX101 = 0x00000068, - ge_se1pa0_clipp_is_event__GFX101 = 0x00000069, - ge_se1pa0_clipp_new_vtx_vect__GFX101 = 0x0000006a, - ge_se1pa0_clipp_null_prim__GFX101 = 0x0000006b, - ge_se1pa0_clipp_send__GFX101 = 0x0000006c, - ge_se1pa0_clipp_send_not_event__GFX101 = 0x0000006d, - ge_se1pa0_clipp_stalled__GFX101 = 0x0000006e, - ge_se1pa0_clipp_starved_busy__GFX101 = 0x0000006f, - ge_se1pa0_clipp_starved_after_work__GFX101 = 0x00000070, - ge_se1pa0_clipp_valid_prim__GFX101 = 0x00000071, - ge_se1pa0_clips_send__GFX101 = 0x00000072, - ge_se1pa0_clips_stalled__GFX101 = 0x00000073, - ge_se1pa0_clipv_send__GFX101 = 0x00000074, - ge_se1pa0_clipv_stalled__GFX101 = 0x00000075, - ge_se1pa1_clipp_eop__GFX101 = 0x00000076, - ge_se1pa1_clipp_eopg__GFX101 = 0x00000077, - ge_se1pa1_clipp_is_event__GFX101 = 0x00000078, - ge_se1pa1_clipp_new_vtx_vect__GFX101 = 0x00000079, - ge_se1pa1_clipp_null_prim__GFX101 = 0x0000007a, - ge_se1pa1_clipp_send__GFX101 = 0x0000007b, - ge_se1pa1_clipp_send_not_event__GFX101 = 0x0000007c, - ge_se1pa1_clipp_stalled__GFX101 = 0x0000007d, - ge_se1pa1_clipp_starved_busy__GFX101 = 0x0000007e, - ge_se1pa1_clipp_starved_after_work__GFX101 = 0x0000007f, - ge_se1pa1_clipp_valid_prim__GFX101 = 0x00000080, - ge_se1pa1_clips_send__GFX101 = 0x00000081, - ge_se1pa1_clips_stalled__GFX101 = 0x00000082, - ge_se1pa1_clipv_send__GFX101 = 0x00000083, - ge_se1pa1_clipv_stalled__GFX101 = 0x00000084, - ge_se2pa0_clipp_eop__GFX101 = 0x00000085, - ge_se2pa0_clipp_eopg__GFX101 = 0x00000086, - ge_se2pa0_clipp_is_event__GFX101 = 0x00000087, - ge_se2pa0_clipp_new_vtx_vect__GFX101 = 0x00000088, - ge_se2pa0_clipp_null_prim__GFX101 = 0x00000089, - ge_se2pa0_clipp_send__GFX101 = 0x0000008a, - ge_se2pa0_clipp_send_not_event__GFX101 = 0x0000008b, - ge_se2pa0_clipp_stalled__GFX101 = 0x0000008c, - ge_se2pa0_clipp_starved_busy__GFX101 = 0x0000008d, - ge_se2pa0_clipp_starved_after_work__GFX101 = 0x0000008e, - ge_se2pa0_clipp_valid_prim__GFX101 = 0x0000008f, - ge_se2pa0_clips_send__GFX101 = 0x00000090, - ge_se2pa0_clips_stalled__GFX101 = 0x00000091, - ge_se2pa0_clipv_send__GFX101 = 0x00000092, - ge_se2pa0_clipv_stalled__GFX101 = 0x00000093, - ge_se2pa1_clipp_eop__GFX101 = 0x00000094, - ge_se2pa1_clipp_eopg__GFX101 = 0x00000095, - ge_se2pa1_clipp_is_event__GFX101 = 0x00000096, - ge_se2pa1_clipp_new_vtx_vect__GFX101 = 0x00000097, - ge_se2pa1_clipp_null_prim__GFX101 = 0x00000098, - ge_se2pa1_clipp_send__GFX101 = 0x00000099, - ge_se2pa1_clipp_send_not_event__GFX101 = 0x0000009a, - ge_se2pa1_clipp_stalled__GFX101 = 0x0000009b, - ge_se2pa1_clipp_starved_busy__GFX101 = 0x0000009c, - ge_se2pa1_clipp_starved_after_work__GFX101 = 0x0000009d, - ge_se2pa1_clipp_valid_prim__GFX101 = 0x0000009e, - ge_se2pa1_clips_send__GFX101 = 0x0000009f, - ge_se2pa1_clips_stalled__GFX101 = 0x000000a0, - ge_se2pa1_clipv_send__GFX101 = 0x000000a1, - ge_se2pa1_clipv_stalled__GFX101 = 0x000000a2, - ge_se3pa0_clipp_eop__GFX101 = 0x000000a3, - ge_se3pa0_clipp_eopg__GFX101 = 0x000000a4, - ge_se3pa0_clipp_is_event__GFX101 = 0x000000a5, - ge_se3pa0_clipp_new_vtx_vect__GFX101 = 0x000000a6, - ge_se3pa0_clipp_null_prim__GFX101 = 0x000000a7, - ge_se3pa0_clipp_send__GFX101 = 0x000000a8, - ge_se3pa0_clipp_send_not_event__GFX101 = 0x000000a9, - ge_se3pa0_clipp_stalled__GFX101 = 0x000000aa, - ge_se3pa0_clipp_starved_busy__GFX101 = 0x000000ab, - ge_se3pa0_clipp_starved_after_work__GFX101 = 0x000000ac, - ge_se3pa0_clipp_valid_prim__GFX101 = 0x000000ad, - ge_se3pa0_clips_send__GFX101 = 0x000000ae, - ge_se3pa0_clips_stalled__GFX101 = 0x000000af, - ge_se3pa0_clipv_send__GFX101 = 0x000000b0, - ge_se3pa0_clipv_stalled__GFX101 = 0x000000b1, - ge_se3pa1_clipp_eop__GFX101 = 0x000000b2, - ge_se3pa1_clipp_eopg__GFX101 = 0x000000b3, - ge_se3pa1_clipp_is_event__GFX101 = 0x000000b4, - ge_se3pa1_clipp_new_vtx_vect__GFX101 = 0x000000b5, - ge_se3pa1_clipp_null_prim__GFX101 = 0x000000b6, - ge_se3pa1_clipp_send__GFX101 = 0x000000b7, - ge_se3pa1_clipp_send_not_event__GFX101 = 0x000000b8, - ge_se3pa1_clipp_stalled__GFX101 = 0x000000b9, - ge_se3pa1_clipp_starved_busy__GFX101 = 0x000000ba, - ge_se3pa1_clipp_starved_after_work__GFX101 = 0x000000bb, - ge_se3pa1_clipp_valid_prim__GFX101 = 0x000000bc, - ge_se3pa1_clips_send__GFX101 = 0x000000bd, - ge_se3pa1_clips_stalled__GFX101 = 0x000000be, - ge_se3pa1_clipv_send__GFX101 = 0x000000bf, - ge_se3pa1_clipv_stalled__GFX101 = 0x000000c0, - ge_rbiu_di_fifo_stalled__GFX101 = 0x000000c1, - ge_rbiu_di_fifo_starved__GFX101 = 0x000000c2, - ge_rbiu_dr_fifo_stalled__GFX101 = 0x000000c3, - ge_rbiu_dr_fifo_starved__GFX101 = 0x000000c4, - ge_reused_es_indices__GFX101 = 0x000000c5, - ge_reused_vs_indices__GFX101 = 0x000000c6, - ge_sclk_core_vld__GFX101 = 0x000000c7, - ge_sclk_gs_vld__GFX101 = 0x000000c8, - ge_sclk_input_vld__GFX101 = 0x000000c9, - ge_sclk_leg_gs_arb_vld__GFX101 = 0x000000ca, - ge_sclk_ngg_vld__GFX101 = 0x000000cb, - ge_sclk_reg_vld__GFX101 = 0x000000cc, - ge_sclk_te11_vld__GFX101 = 0x000000cd, - ge_sclk_vr_vld__GFX101 = 0x000000ce, - ge_sclk_wd_te11_vld__GFX101 = 0x000000cf, - ge_spi_esvert_eov__GFX101 = 0x000000d0, - ge_spi_esvert_stalled__GFX101 = 0x000000d1, - ge_spi_esvert_starved_busy__GFX101 = 0x000000d2, - ge_spi_esvert_valid__GFX101 = 0x000000d3, - ge_spi_eswave_is_event__GFX101 = 0x000000d4, - ge_spi_eswave_send__GFX101 = 0x000000d5, - ge_se0spi_gsprim_cont__GFX101 = 0x000000d6, - ge_se1spi_gsprim_cont__GFX101 = 0x000000d7, - ge_se2spi_gsprim_cont__GFX101 = 0x000000d8, - ge_se3spi_gsprim_cont__GFX101 = 0x000000d9, - ge_spi_gsprim_eov__GFX101 = 0x000000da, - ge_spi_gsprim_stalled__GFX101 = 0x000000db, - ge_spi_gsprim_starved_busy__GFX101 = 0x000000dc, - ge_spi_gsprim_valid__GFX101 = 0x000000dd, - ge_spi_gssubgrp_is_event__GFX101 = 0x000000de, - ge_spi_gssubgrp_send__GFX101 = 0x000000df, - ge_spi_hsvert_eov__GFX101 = 0x000000e0, - ge_spi_hsvert_stalled__GFX101 = 0x000000e1, - ge_spi_hsvert_starved_busy__GFX101 = 0x000000e2, - ge_spi_hsvert_valid__GFX101 = 0x000000e3, - ge_spi_hswave_is_event__GFX101 = 0x000000e4, - ge_spi_hswave_send__GFX101 = 0x000000e5, - ge_spi_lsvert_eov__GFX101 = 0x000000e6, - ge_spi_lsvert_stalled__GFX101 = 0x000000e7, - ge_spi_lsvert_starved_busy__GFX101 = 0x000000e8, - ge_spi_lsvert_valid__GFX101 = 0x000000e9, - ge_spi_lswave_is_event__GFX101 = 0x000000ea, - ge_spi_lswave_send__GFX101 = 0x000000eb, - ge_spi_vsvert_eov__GFX101 = 0x000000ec, - ge_spi_vsvert_send__GFX101 = 0x000000ed, - ge_spi_vsvert_stalled__GFX101 = 0x000000ee, - ge_spi_vsvert_starved_busy__GFX101 = 0x000000ef, - ge_spi_vswave_is_event__GFX101 = 0x000000f0, - ge_spi_vswave_send__GFX101 = 0x000000f1, - ge_starved_on_hs_done__GFX101 = 0x000000f2, - ge_stat_busy__GFX101 = 0x000000f3, - ge_stat_combined_busy__GFX101 = 0x000000f4, - ge_stat_no_dma_busy__GFX101 = 0x000000f5, - ge_strmout_stalled__GFX101 = 0x000000f6, - ge_te11_busy__GFX101 = 0x000000f7, - ge_te11_starved__GFX101 = 0x000000f8, - ge_tfreq_lat_bin_0__GFX101 = 0x000000f9, - ge_tfreq_lat_bin_1__GFX101 = 0x000000fa, - ge_tfreq_lat_bin_2__GFX101 = 0x000000fb, - ge_tfreq_lat_bin_3__GFX101 = 0x000000fc, - ge_tfreq_lat_bin_4__GFX101 = 0x000000fd, - ge_tfreq_lat_bin_5__GFX101 = 0x000000fe, - ge_tfreq_lat_bin_6__GFX101 = 0x000000ff, - ge_tfreq_lat_bin_7__GFX101 = 0x00000100, - ge_tfreq_utcl1_consecutive_retry_event__GFX101 = 0x00000101, - ge_tfreq_utcl1_request_event__GFX101 = 0x00000102, - ge_tfreq_utcl1_retry_event__GFX101 = 0x00000103, - ge_tfreq_utcl1_stall_event__GFX101 = 0x00000104, - ge_tfreq_utcl1_stall_utcl2_event__GFX101 = 0x00000105, - ge_tfreq_utcl1_translation_hit_event__GFX101 = 0x00000106, - ge_tfreq_utcl1_translation_miss_event__GFX101 = 0x00000107, - ge_ls_thread_group__GFX101 = 0x00000108, - ge_rcm_indicies_hit__GFX101 = 0x00000109, - ge_vs_cache_hits__GFX101 = 0x0000010a, - ge_vs_done__GFX101 = 0x0000010b, - ge_vs_pc_stall__GFX101 = 0x0000010c, - ge_vs_table_high_water_mark__GFX101 = 0x0000010d, - ge_vs_thread_groups__GFX101 = 0x0000010e, - ge_vsvert_api_send__GFX101 = 0x0000010f, - ge_vsvert_ds_send__GFX101 = 0x00000110, - ge_wait_for_es_done_stalled__GFX101 = 0x00000111, - ge_waveid_stalled__GFX101 = 0x00000112, - ge_spi_vsvert_valid__GFX101 = 0x00000113, - pc_feorder_fifo_full__GFX101 = 0x00000114, - pc_ge_manager_busy__GFX101 = 0x00000115, - pc_req_stall_se0__GFX101 = 0x00000116, - pc_req_stall_se1__GFX101 = 0x00000117, - pc_req_stall_se2__GFX101 = 0x00000118, - pc_req_stall_se3__GFX101 = 0x00000119, - ge_pipe0_to_pipe1__GFX101 = 0x0000011a, - ge_pipe1_to_pipe0__GFX101 = 0x0000011b, - ge_dma_return_size_cl0__GFX101 = 0x0000011c, - ge_dma_return_size_cl1__GFX101 = 0x0000011d, - ge_spi_gssubgrp_event_window_active__GFX101 = 0x0000011e, - ge_bypass_fifo_full__GFX101 = 0x0000011f, - ge_hs_input_stall0__GFX101 = 0x00000120, - ge_hs_input_stall1__GFX101 = 0x00000121, - ge_pc_space_zero__GFX101 = 0x00000122, - vgt_se0pa0_clipv_starved_busy__GFX101 = 0x00000123, - vgt_se0pa1_clipv_starved_busy__GFX101 = 0x00000124, - vgt_se1pa0_clipv_starved_busy__GFX101 = 0x00000125, - vgt_se1pa1_clipv_starved_busy__GFX101 = 0x00000126, - vgt_se2pa0_clipv_starved_busy__GFX101 = 0x00000127, - vgt_se2pa1_clipv_starved_busy__GFX101 = 0x00000128, - vgt_se3pa0_clipv_starved_busy__GFX101 = 0x00000129, - vgt_se3pa1_clipv_starved_busy__GFX101 = 0x0000012a, - vgt_se0pa0_clipv_firstvert__GFX101 = 0x0000012b, - vgt_se0pa1_clipv_firstvert__GFX101 = 0x0000012c, - vgt_se1pa0_clipv_firstvert__GFX101 = 0x0000012d, - vgt_se1pa1_clipv_firstvert__GFX101 = 0x0000012e, - vgt_se2pa0_clipv_firstvert__GFX101 = 0x0000012f, - vgt_se2pa1_clipv_firstvert__GFX101 = 0x00000130, - vgt_se3pa0_clipv_firstvert__GFX101 = 0x00000131, - vgt_se3pa1_clipv_firstvert__GFX101 = 0x00000132, - vgt_se0pa0_clips_starved_busy__GFX101 = 0x00000133, - vgt_se0pa1_clips_starved_busy__GFX101 = 0x00000134, - vgt_se1pa0_clips_starved_busy__GFX101 = 0x00000135, - vgt_se1pa1_clips_starved_busy__GFX101 = 0x00000136, - vgt_se2pa0_clips_starved_busy__GFX101 = 0x00000137, - vgt_se2pa1_clips_starved_busy__GFX101 = 0x00000138, - vgt_se3pa0_clips_starved_busy__GFX101 = 0x00000139, - vgt_se3pa1_clips_starved_busy__GFX101 = 0x0000013a, -} GE_PERFCOUNT_SELECT; - -constexpr unsigned int MaxGePerfcountSelectGfx101 = vgt_se3pa1_clips_starved_busy__GFX101; - -typedef enum GL0V_CACHE_POLICIES { - GL0V_CACHE_POLICY_MISS_LRU = 0x00000000, - GL0V_CACHE_POLICY_MISS_EVICT = 0x00000001, - GL0V_CACHE_POLICY_HIT_LRU = 0x00000002, - GL0V_CACHE_POLICY_HIT_EVICT = 0x00000003, -} GL0V_CACHE_POLICIES; - -typedef enum GL1A_PERF_SEL { - GL1A_PERF_SEL_BUSY = 0x00000000, - GL1A_PERF_SEL_STALL_GL1C0 = 0x00000001, - GL1A_PERF_SEL_STALL_GL1C1 = 0x00000002, - GL1A_PERF_SEL_STALL_GL1C2 = 0x00000003, - GL1A_PERF_SEL_STALL_GL1C3 = 0x00000004, - GL1A_PERF_SEL_REQUEST_GL1C0__GFX101 = 0x00000005, - GL1A_PERF_SEL_REQUEST_GL1C1__GFX101 = 0x00000006, - GL1A_PERF_SEL_REQUEST_GL1C2__GFX101 = 0x00000007, - GL1A_PERF_SEL_REQUEST_GL1C3__GFX101 = 0x00000008, - GL1A_PERF_SEL_MEM_32B_WDS_GL1C0__GFX101 = 0x00000009, - GL1A_PERF_SEL_MEM_32B_WDS_GL1C1__GFX101 = 0x0000000a, - GL1A_PERF_SEL_MEM_32B_WDS_GL1C2__GFX101 = 0x0000000b, - GL1A_PERF_SEL_MEM_32B_WDS_GL1C3__GFX101 = 0x0000000c, - GL1A_PERF_SEL_IO_32B_WDS_GL1C0__GFX101 = 0x0000000d, - GL1A_PERF_SEL_IO_32B_WDS_GL1C1__GFX101 = 0x0000000e, - GL1A_PERF_SEL_IO_32B_WDS_GL1C2__GFX101 = 0x0000000f, - GL1A_PERF_SEL_IO_32B_WDS_GL1C3__GFX101 = 0x00000010, - GL1A_PERF_SEL_MEM_BURST_COUNT_GL1C0__GFX101 = 0x00000011, - GL1A_PERF_SEL_MEM_BURST_COUNT_GL1C1__GFX101 = 0x00000012, - GL1A_PERF_SEL_MEM_BURST_COUNT_GL1C2__GFX101 = 0x00000013, - GL1A_PERF_SEL_MEM_BURST_COUNT_GL1C3__GFX101 = 0x00000014, - GL1A_PERF_SEL_IO_BURST_COUNT_GL1C0__GFX101 = 0x00000015, - GL1A_PERF_SEL_IO_BURST_COUNT_GL1C1__GFX101 = 0x00000016, - GL1A_PERF_SEL_IO_BURST_COUNT_GL1C2__GFX101 = 0x00000017, - GL1A_PERF_SEL_IO_BURST_COUNT_GL1C3__GFX101 = 0x00000018, - GL1A_PERF_SEL_ARB_REQUESTS__GFX101 = 0x00000019, - GL1A_PERF_SEL_REQ_ARB_LEVEL_GL1C0__GFX101 = 0x0000001a, - GL1A_PERF_SEL_REQ_ARB_LEVEL_GL1C1__GFX101 = 0x0000001b, - GL1A_PERF_SEL_REQ_ARB_LEVEL_GL1C2__GFX101 = 0x0000001c, - GL1A_PERF_SEL_REQ_ARB_LEVEL_GL1C3__GFX101 = 0x0000001d, - GL1A_PERF_SEL_REQ_INFLIGHT_LEVEL__GFX101 = 0x0000001e, - GL1A_PERF_SEL_STALL_RET_CONFLICT_GL1C0__GFX101 = 0x0000001f, - GL1A_PERF_SEL_STALL_RET_CONFLICT_GL1C1__GFX101 = 0x00000020, - GL1A_PERF_SEL_STALL_RET_CONFLICT_GL1C2__GFX101 = 0x00000021, - GL1A_PERF_SEL_STALL_RET_CONFLICT_GL1C3__GFX101 = 0x00000022, - GL1A_PERF_SEL_CYCLE__GFX101 = 0x00000023, - GL1A_PERF_SEL_REQUEST_GL1C0__GFX103PLUSEXCLUSIVE = 0x00000005, - GL1A_PERF_SEL_REQUEST_GL1C1__GFX103PLUSEXCLUSIVE = 0x00000006, - GL1A_PERF_SEL_REQUEST_GL1C2__GFX103PLUSEXCLUSIVE = 0x00000007, - GL1A_PERF_SEL_REQUEST_GL1C3__GFX103PLUSEXCLUSIVE = 0x00000008, - GL1A_PERF_SEL_WDS_32B_GL1C0__GFX103PLUSEXCLUSIVE = 0x00000009, - GL1A_PERF_SEL_WDS_32B_GL1C1__GFX103PLUSEXCLUSIVE = 0x0000000a, - GL1A_PERF_SEL_WDS_32B_GL1C2__GFX103PLUSEXCLUSIVE = 0x0000000b, - GL1A_PERF_SEL_WDS_32B_GL1C3__GFX103PLUSEXCLUSIVE = 0x0000000c, - GL1A_PERF_SEL_BURST_COUNT_GL1C0__GFX103PLUSEXCLUSIVE = 0x0000000d, - GL1A_PERF_SEL_BURST_COUNT_GL1C1__GFX103PLUSEXCLUSIVE = 0x0000000e, - GL1A_PERF_SEL_BURST_COUNT_GL1C2__GFX103PLUSEXCLUSIVE = 0x0000000f, - GL1A_PERF_SEL_BURST_COUNT_GL1C3__GFX103PLUSEXCLUSIVE = 0x00000010, - GL1A_PERF_SEL_ARB_REQUESTS__GFX103PLUSEXCLUSIVE = 0x00000011, - GL1A_PERF_SEL_REQ_INFLIGHT_LEVEL__GFX103PLUSEXCLUSIVE = 0x00000012, - GL1A_PERF_SEL_STALL_RET_CONFLICT_GL1C0__GFX103PLUSEXCLUSIVE = 0x00000013, - GL1A_PERF_SEL_STALL_RET_CONFLICT_GL1C1__GFX103PLUSEXCLUSIVE = 0x00000014, - GL1A_PERF_SEL_STALL_RET_CONFLICT_GL1C2__GFX103PLUSEXCLUSIVE = 0x00000015, - GL1A_PERF_SEL_STALL_RET_CONFLICT_GL1C3__GFX103PLUSEXCLUSIVE = 0x00000016, - GL1A_PERF_SEL_CYCLE__GFX103PLUSEXCLUSIVE = 0x00000017, -} GL1A_PERF_SEL; - -constexpr unsigned int MaxGl1aPerfSelGfx101 = GL1A_PERF_SEL_CYCLE__GFX101; -constexpr unsigned int MaxGl1aPerfSelGfx103PlusExclusive = GL1A_PERF_SEL_CYCLE__GFX103PLUSEXCLUSIVE; - -typedef enum GL1C_PERF_SEL { - GL1C_PERF_SEL_CYCLE = 0x00000000, - GL1C_PERF_SEL_BUSY = 0x00000001, - GL1C_PERF_SEL_STARVE = 0x00000002, - GL1C_PERF_SEL_ARB_RET_LEVEL = 0x00000003, - GL1C_PERF_SEL_GL2_REQ_READ = 0x00000004, - GL1C_PERF_SEL_GL2_REQ_READ_128B = 0x00000005, - GL1C_PERF_SEL_GL2_REQ_READ_32B = 0x00000006, - GL1C_PERF_SEL_GL2_REQ_READ_64B = 0x00000007, - GL1C_PERF_SEL_GL2_REQ_READ_LATENCY = 0x00000008, - GL1C_PERF_SEL_GL2_REQ_WRITE = 0x00000009, - GL1C_PERF_SEL_GL2_REQ_WRITE_32B = 0x0000000a, - GL1C_PERF_SEL_GL2_REQ_WRITE_64B = 0x0000000b, - GL1C_PERF_SEL_GL2_REQ_WRITE_LATENCY = 0x0000000c, - GL1C_PERF_SEL_GL2_REQ_PREFETCH = 0x0000000d, - GL1C_PERF_SEL_REQ = 0x0000000e, - GL1C_PERF_SEL_REQ_ATOMIC_WITH_RET = 0x0000000f, - GL1C_PERF_SEL_REQ_ATOMIC_WITHOUT_RET = 0x00000010, - GL1C_PERF_SEL_REQ_SHADER_INV = 0x00000011, - GL1C_PERF_SEL_REQ_MISS = 0x00000012, - GL1C_PERF_SEL_REQ_NOP_ACK = 0x00000013, - GL1C_PERF_SEL_REQ_NOP_RTN0 = 0x00000014, - GL1C_PERF_SEL_REQ_READ = 0x00000015, - GL1C_PERF_SEL_REQ_READ_128B = 0x00000016, - GL1C_PERF_SEL_REQ_READ_32B = 0x00000017, - GL1C_PERF_SEL_REQ_READ_64B = 0x00000018, - GL1C_PERF_SEL_REQ_READ_POLICY_HIT_EVICT = 0x00000019, - GL1C_PERF_SEL_REQ_READ_POLICY_HIT_LRU = 0x0000001a, - GL1C_PERF_SEL_REQ_READ_POLICY_MISS_EVICT = 0x0000001b, - GL1C_PERF_SEL_REQ_WRITE = 0x0000001c, - GL1C_PERF_SEL_REQ_WRITE_32B = 0x0000001d, - GL1C_PERF_SEL_REQ_WRITE_64B = 0x0000001e, - GL1C_PERF_SEL_STALL_GL2_GL1 = 0x0000001f, - GL1C_PERF_SEL_STALL_LFIFO_FULL = 0x00000020, - GL1C_PERF_SEL_STALL_NO_AVAILABLE_ACK_ALLOC = 0x00000021, - GL1C_PERF_SEL_STALL_NOTHING_REPLACEABLE = 0x00000022, - GL1C_PERF_SEL_STALL_GCR_INV = 0x00000023, - GL1C_PERF_SEL_REQ_CLIENT0__GFX101 = 0x00000024, - GL1C_PERF_SEL_REQ_CLIENT1__GFX101 = 0x00000025, - GL1C_PERF_SEL_REQ_CLIENT2__GFX101 = 0x00000026, - GL1C_PERF_SEL_REQ_CLIENT3__GFX101 = 0x00000027, - GL1C_PERF_SEL_REQ_CLIENT4__GFX101 = 0x00000028, - GL1C_PERF_SEL_REQ_CLIENT5__GFX101 = 0x00000029, - GL1C_PERF_SEL_REQ_CLIENT6__GFX101 = 0x0000002a, - GL1C_PERF_SEL_REQ_CLIENT7__GFX101 = 0x0000002b, - GL1C_PERF_SEL_REQ_CLIENT8__GFX101 = 0x0000002c, - GL1C_PERF_SEL_REQ_CLIENT9__GFX101 = 0x0000002d, - GL1C_PERF_SEL_REQ_CLIENT10__GFX101 = 0x0000002e, - GL1C_PERF_SEL_REQ_CLIENT11__GFX101 = 0x0000002f, - GL1C_PERF_SEL_REQ_CLIENT12__GFX101 = 0x00000030, - GL1C_PERF_SEL_REQ_CLIENT13__GFX101 = 0x00000031, - GL1C_PERF_SEL_REQ_CLIENT14__GFX101 = 0x00000032, - GL1C_PERF_SEL_REQ_CLIENT15__GFX101 = 0x00000033, - GL1C_PERF_SEL_REQ_CLIENT16__GFX101 = 0x00000034, - GL1C_PERF_SEL_REQ_CLIENT17__GFX101 = 0x00000035, - GL1C_PERF_SEL_REQ_CLIENT18__GFX101 = 0x00000036, - GL1C_PERF_SEL_REQ_CLIENT19__GFX101 = 0x00000037, - GL1C_PERF_SEL_REQ_CLIENT20__GFX101 = 0x00000038, - GL1C_PERF_SEL_REQ_CLIENT21__GFX101 = 0x00000039, - GL1C_PERF_SEL_REQ_CLIENT22__GFX101 = 0x0000003a, - GL1C_PERF_SEL_REQ_CLIENT23__GFX101 = 0x0000003b, - GL1C_PERF_SEL_REQ_CLIENT24__GFX101 = 0x0000003c, - GL1C_PERF_SEL_REQ_CLIENT25__GFX101 = 0x0000003d, - GL1C_PERF_SEL_REQ_CLIENT26__GFX101 = 0x0000003e, - GL1C_PERF_SEL_REQ_CLIENT27__GFX101 = 0x0000003f, - GL1C_PERF_SEL_UTCL0_LFIFO_FULL__GFX103DERIVATIVE = 0x00000045, - GL1C_PERF_SEL_UTCL0_STALL_INFLIGHT_MAX__GFX103DERIVATIVE = 0x00000046, - GL1C_PERF_SEL_UTCL0_STALL_LFIFO_NOT_RES__GFX103DERIVATIVE = 0x00000047, - GL1C_PERF_SEL_UTCL0_STALL_LRU_INFLIGHT__GFX103DERIVATIVE = 0x00000048, - GL1C_PERF_SEL_UTCL0_STALL_MISSFIFO_FULL__GFX103DERIVATIVE = 0x00000049, - GL1C_PERF_SEL_UTCL0_STALL_MULTI_MISS__GFX103DERIVATIVE = 0x0000004a, - GL1C_PERF_SEL_UTCL0_STALL_UTCL1_REQ_OUT_OF_CREDITS__GFX103DERIVATIVE = 0x0000004b, - GL1C_PERF_SEL_UTCL0_UTCL1_PERM_FAULT__GFX103DERIVATIVE = 0x0000004c, - GL1C_PERF_SEL_CLIENT_UTCL0_INFLIGHT__GFX103DERIVATIVE = 0x0000004d, - GL1C_PERF_SEL_UTCL0_UTCL1_INFLIGHT__GFX103DERIVATIVE = 0x0000004e, - GL1C_PERF_SEL_UTCL0_INTERNAL_RETRY_REQ__GFX103DERIVATIVE = 0x0000004f, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_RETRY_FAULT__GFX103DERIVATIVE = 0x00000050, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_PRT_FAULT__GFX103DERIVATIVE = 0x00000051, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX103DERIVATIVE = 0x00000052, - GL1C_PERF_SEL_STALL_VM__GFX103PLUSEXCLUSIVE = 0x00000024, - GL1C_PERF_SEL_REQ_CLIENT0__GFX103PLUSEXCLUSIVE = 0x00000025, - GL1C_PERF_SEL_REQ_CLIENT1__GFX103PLUSEXCLUSIVE = 0x00000026, - GL1C_PERF_SEL_REQ_CLIENT2__GFX103PLUSEXCLUSIVE = 0x00000027, - GL1C_PERF_SEL_REQ_CLIENT3__GFX103PLUSEXCLUSIVE = 0x00000028, - GL1C_PERF_SEL_REQ_CLIENT4__GFX103PLUSEXCLUSIVE = 0x00000029, - GL1C_PERF_SEL_REQ_CLIENT5__GFX103PLUSEXCLUSIVE = 0x0000002a, - GL1C_PERF_SEL_REQ_CLIENT6__GFX103PLUSEXCLUSIVE = 0x0000002b, - GL1C_PERF_SEL_REQ_CLIENT7__GFX103PLUSEXCLUSIVE = 0x0000002c, - GL1C_PERF_SEL_REQ_CLIENT8__GFX103PLUSEXCLUSIVE = 0x0000002d, - GL1C_PERF_SEL_REQ_CLIENT9__GFX103PLUSEXCLUSIVE = 0x0000002e, - GL1C_PERF_SEL_REQ_CLIENT10__GFX103PLUSEXCLUSIVE = 0x0000002f, - GL1C_PERF_SEL_REQ_CLIENT11__GFX103PLUSEXCLUSIVE = 0x00000030, - GL1C_PERF_SEL_REQ_CLIENT12__GFX103PLUSEXCLUSIVE = 0x00000031, - GL1C_PERF_SEL_REQ_CLIENT13__GFX103PLUSEXCLUSIVE = 0x00000032, - GL1C_PERF_SEL_REQ_CLIENT14__GFX103PLUSEXCLUSIVE = 0x00000033, - GL1C_PERF_SEL_REQ_CLIENT15__GFX103PLUSEXCLUSIVE = 0x00000034, - GL1C_PERF_SEL_REQ_CLIENT16__GFX103PLUSEXCLUSIVE = 0x00000035, - GL1C_PERF_SEL_REQ_CLIENT17__GFX103PLUSEXCLUSIVE = 0x00000036, - GL1C_PERF_SEL_REQ_CLIENT18__GFX103PLUSEXCLUSIVE = 0x00000037, - GL1C_PERF_SEL_REQ_CLIENT19__GFX103PLUSEXCLUSIVE = 0x00000038, - GL1C_PERF_SEL_REQ_CLIENT20__GFX103PLUSEXCLUSIVE = 0x00000039, - GL1C_PERF_SEL_REQ_CLIENT21__GFX103PLUSEXCLUSIVE = 0x0000003a, - GL1C_PERF_SEL_REQ_CLIENT22__GFX103PLUSEXCLUSIVE = 0x0000003b, - GL1C_PERF_SEL_REQ_CLIENT23__GFX103PLUSEXCLUSIVE = 0x0000003c, - GL1C_PERF_SEL_REQ_CLIENT24__GFX103PLUSEXCLUSIVE = 0x0000003d, - GL1C_PERF_SEL_REQ_CLIENT25__GFX103PLUSEXCLUSIVE = 0x0000003e, - GL1C_PERF_SEL_REQ_CLIENT26__GFX103PLUSEXCLUSIVE = 0x0000003f, - GL1C_PERF_SEL_REQ_CLIENT27__GFX103PLUSEXCLUSIVE = 0x00000040, - GL1C_PERF_SEL_UTCL0_REQUEST__GFX103PLUSEXCLUSIVE = 0x00000041, - GL1C_PERF_SEL_UTCL0_TRANSLATION_HIT__GFX103PLUSEXCLUSIVE = 0x00000042, - GL1C_PERF_SEL_UTCL0_TRANSLATION_MISS__GFX103PLUSEXCLUSIVE = 0x00000043, - GL1C_PERF_SEL_UTCL0_PERMISSION_MISS__GFX103PLUSEXCLUSIVE = 0x00000044, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - GL1C_PERF_SEL_UTCL0_MISS_UNDER_MISS__GFX11 = 0x00000045, - GL1C_PERF_SEL_UTCL0_LFIFO_FULL__GFX11 = 0x00000046, - GL1C_PERF_SEL_UTCL0_STALL_INFLIGHT_MAX__GFX11 = 0x00000047, - GL1C_PERF_SEL_UTCL0_STALL_LFIFO_NOT_RES__GFX11 = 0x00000048, - GL1C_PERF_SEL_UTCL0_STALL_LRU_INFLIGHT__GFX11 = 0x00000049, - GL1C_PERF_SEL_UTCL0_STALL_MISSFIFO_FULL__GFX11 = 0x0000004a, - GL1C_PERF_SEL_UTCL0_STALL_MULTI_MISS__GFX11 = 0x0000004b, - GL1C_PERF_SEL_UTCL0_STALL_UTCL1_REQ_OUT_OF_CREDITS__GFX11 = 0x0000004c, - GL1C_PERF_SEL_UTCL0_UTCL1_PERM_FAULT__GFX11 = 0x0000004d, - GL1C_PERF_SEL_CLIENT_UTCL0_INFLIGHT__GFX11 = 0x0000004e, - GL1C_PERF_SEL_UTCL0_UTCL1_INFLIGHT__GFX11 = 0x0000004f, - GL1C_PERF_SEL_UTCL0_INTERNAL_RETRY_REQ__GFX11 = 0x00000050, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_RETRY_FAULT__GFX11 = 0x00000051, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_PRT_FAULT__GFX11 = 0x00000052, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX11 = 0x00000053, -#endif -} GL1C_PERF_SEL; - -constexpr unsigned int MaxGl1cPerfSelGfx101 = GL1C_PERF_SEL_REQ_CLIENT27__GFX101; -constexpr unsigned int MaxGl1cPerfSelGfx103Derivative = GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX103DERIVATIVE; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxGl1cPerfSelGfx11 = GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX11; -#endif - -typedef enum GL1_CACHE_POLICIES { - GL1_CACHE_POLICY_MISS_LRU = 0x00000000, - GL1_CACHE_POLICY_MISS_EVICT = 0x00000001, - GL1_CACHE_POLICY_HIT_LRU = 0x00000002, - GL1_CACHE_POLICY_HIT_EVICT = 0x00000003, -} GL1_CACHE_POLICIES; - -typedef enum GL1_CACHE_STORE_POLICIES { - GL1_CACHE_STORE_POLICY_BYPASS = 0x00000000, -} GL1_CACHE_STORE_POLICIES; - -typedef enum GL2A_PERF_SEL { - GL2A_PERF_SEL_NONE = 0x00000000, - GL2A_PERF_SEL_CYCLE = 0x00000001, - GL2A_PERF_SEL_BUSY = 0x00000002, - GL2A_PERF_SEL_REQ_GL2C0 = 0x00000003, - GL2A_PERF_SEL_REQ_GL2C1 = 0x00000004, - GL2A_PERF_SEL_REQ_GL2C2 = 0x00000005, - GL2A_PERF_SEL_REQ_GL2C3 = 0x00000006, - GL2A_PERF_SEL_REQ_GL2C4 = 0x00000007, - GL2A_PERF_SEL_REQ_GL2C5 = 0x00000008, - GL2A_PERF_SEL_REQ_GL2C6 = 0x00000009, - GL2A_PERF_SEL_REQ_GL2C7 = 0x0000000a, - GL2A_PERF_SEL_REQ_HI_PRIO_GL2C0 = 0x0000000b, - GL2A_PERF_SEL_REQ_HI_PRIO_GL2C1 = 0x0000000c, - GL2A_PERF_SEL_REQ_HI_PRIO_GL2C2 = 0x0000000d, - GL2A_PERF_SEL_REQ_HI_PRIO_GL2C3 = 0x0000000e, - GL2A_PERF_SEL_REQ_HI_PRIO_GL2C4 = 0x0000000f, - GL2A_PERF_SEL_REQ_HI_PRIO_GL2C5 = 0x00000010, - GL2A_PERF_SEL_REQ_HI_PRIO_GL2C6 = 0x00000011, - GL2A_PERF_SEL_REQ_HI_PRIO_GL2C7 = 0x00000012, - GL2A_PERF_SEL_REQ_BURST_GL2C0 = 0x00000013, - GL2A_PERF_SEL_REQ_BURST_GL2C1 = 0x00000014, - GL2A_PERF_SEL_REQ_BURST_GL2C2 = 0x00000015, - GL2A_PERF_SEL_REQ_BURST_GL2C3 = 0x00000016, - GL2A_PERF_SEL_REQ_BURST_GL2C4 = 0x00000017, - GL2A_PERF_SEL_REQ_BURST_GL2C5 = 0x00000018, - GL2A_PERF_SEL_REQ_BURST_GL2C6 = 0x00000019, - GL2A_PERF_SEL_REQ_BURST_GL2C7 = 0x0000001a, - GL2A_PERF_SEL_REQ_STALL_GL2C0 = 0x0000001b, - GL2A_PERF_SEL_REQ_STALL_GL2C1 = 0x0000001c, - GL2A_PERF_SEL_REQ_STALL_GL2C2 = 0x0000001d, - GL2A_PERF_SEL_REQ_STALL_GL2C3 = 0x0000001e, - GL2A_PERF_SEL_REQ_STALL_GL2C4 = 0x0000001f, - GL2A_PERF_SEL_REQ_STALL_GL2C5 = 0x00000020, - GL2A_PERF_SEL_REQ_STALL_GL2C6 = 0x00000021, - GL2A_PERF_SEL_REQ_STALL_GL2C7 = 0x00000022, - GL2A_PERF_SEL_RTN_STALL_GL2C0 = 0x00000023, - GL2A_PERF_SEL_RTN_STALL_GL2C1 = 0x00000024, - GL2A_PERF_SEL_RTN_STALL_GL2C2 = 0x00000025, - GL2A_PERF_SEL_RTN_STALL_GL2C3 = 0x00000026, - GL2A_PERF_SEL_RTN_STALL_GL2C4 = 0x00000027, - GL2A_PERF_SEL_RTN_STALL_GL2C5 = 0x00000028, - GL2A_PERF_SEL_RTN_STALL_GL2C6 = 0x00000029, - GL2A_PERF_SEL_RTN_STALL_GL2C7 = 0x0000002a, - GL2A_PERF_SEL_RTN_CLIENT0 = 0x0000002b, - GL2A_PERF_SEL_RTN_CLIENT1 = 0x0000002c, - GL2A_PERF_SEL_RTN_CLIENT2 = 0x0000002d, - GL2A_PERF_SEL_RTN_CLIENT3 = 0x0000002e, - GL2A_PERF_SEL_RTN_CLIENT4 = 0x0000002f, - GL2A_PERF_SEL_RTN_CLIENT5 = 0x00000030, - GL2A_PERF_SEL_RTN_CLIENT6 = 0x00000031, - GL2A_PERF_SEL_RTN_CLIENT7 = 0x00000032, - GL2A_PERF_SEL_RTN_CLIENT8 = 0x00000033, - GL2A_PERF_SEL_RTN_CLIENT9 = 0x00000034, - GL2A_PERF_SEL_RTN_CLIENT10 = 0x00000035, - GL2A_PERF_SEL_RTN_CLIENT11 = 0x00000036, - GL2A_PERF_SEL_RTN_CLIENT12 = 0x00000037, - GL2A_PERF_SEL_RTN_CLIENT13 = 0x00000038, - GL2A_PERF_SEL_RTN_CLIENT14 = 0x00000039, - GL2A_PERF_SEL_RTN_CLIENT15 = 0x0000003a, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT0 = 0x0000003b, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT1 = 0x0000003c, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT2 = 0x0000003d, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT3 = 0x0000003e, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT4 = 0x0000003f, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT5 = 0x00000040, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT6 = 0x00000041, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT7 = 0x00000042, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT8 = 0x00000043, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT9 = 0x00000044, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT10 = 0x00000045, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT11 = 0x00000046, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT12 = 0x00000047, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT13 = 0x00000048, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT14 = 0x00000049, - GL2A_PERF_SEL_RTN_ARB_COLLISION_CLIENT15 = 0x0000004a, - GL2A_PERF_SEL_REQ_BURST_CLIENT0 = 0x0000004b, - GL2A_PERF_SEL_REQ_BURST_CLIENT1 = 0x0000004c, - GL2A_PERF_SEL_REQ_BURST_CLIENT2 = 0x0000004d, - GL2A_PERF_SEL_REQ_BURST_CLIENT3 = 0x0000004e, - GL2A_PERF_SEL_REQ_BURST_CLIENT4 = 0x0000004f, - GL2A_PERF_SEL_REQ_BURST_CLIENT5 = 0x00000050, - GL2A_PERF_SEL_REQ_BURST_CLIENT6 = 0x00000051, - GL2A_PERF_SEL_REQ_BURST_CLIENT7 = 0x00000052, - GL2A_PERF_SEL_REQ_BURST_CLIENT8 = 0x00000053, - GL2A_PERF_SEL_REQ_BURST_CLIENT9 = 0x00000054, - GL2A_PERF_SEL_REQ_BURST_CLIENT10 = 0x00000055, - GL2A_PERF_SEL_REQ_BURST_CLIENT11 = 0x00000056, - GL2A_PERF_SEL_REQ_BURST_CLIENT12 = 0x00000057, - GL2A_PERF_SEL_REQ_BURST_CLIENT13 = 0x00000058, - GL2A_PERF_SEL_REQ_BURST_CLIENT14 = 0x00000059, - GL2A_PERF_SEL_REQ_BURST_CLIENT15 = 0x0000005a, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT0__GFX104PLUS = 0x0000005b, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT1__GFX104PLUS = 0x0000005c, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT2__GFX104PLUS = 0x0000005d, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT3__GFX104PLUS = 0x0000005e, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT4__GFX104PLUS = 0x0000005f, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT5__GFX104PLUS = 0x00000060, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT6__GFX104PLUS = 0x00000061, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT7__GFX104PLUS = 0x00000062, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT8__GFX104PLUS = 0x00000063, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT9__GFX104PLUS = 0x00000064, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT10__GFX104PLUS = 0x00000065, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT11__GFX104PLUS = 0x00000067, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT12__GFX104PLUS = 0x00000068, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT13__GFX104PLUS = 0x00000069, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT14__GFX104PLUS = 0x0000006a, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT15__GFX104PLUS = 0x0000006b, -#endif -} GL2A_PERF_SEL; - -constexpr unsigned int MaxGl2aPerfSelGfx10Core = GL2A_PERF_SEL_REQ_BURST_CLIENT15; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxGl2aPerfSelGfx104Plus = GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT15__GFX104PLUS; -#endif - -typedef enum GL2C_PERF_SEL { - GL2C_PERF_SEL_NONE = 0x00000000, - GL2C_PERF_SEL_CYCLE = 0x00000001, - GL2C_PERF_SEL_BUSY = 0x00000002, - GL2C_PERF_SEL_REQ = 0x00000003, - GL2C_PERF_SEL_VOL_REQ = 0x00000004, - GL2C_PERF_SEL_HIGH_PRIORITY_REQ = 0x00000005, - GL2C_PERF_SEL_READ = 0x00000006, - GL2C_PERF_SEL_WRITE = 0x00000007, - GL2C_PERF_SEL_ATOMIC = 0x00000008, - GL2C_PERF_SEL_NOP_ACK = 0x00000009, - GL2C_PERF_SEL_NOP_RTN0 = 0x0000000a, - GL2C_PERF_SEL_PROBE = 0x0000000b, - GL2C_PERF_SEL_PROBE_ALL = 0x0000000c, - GL2C_PERF_SEL_INTERNAL_PROBE = 0x0000000d, - GL2C_PERF_SEL_COMPRESSED_READ_REQ = 0x0000000e, - GL2C_PERF_SEL_METADATA_READ_REQ = 0x0000000f, - GL2C_PERF_SEL_CLIENT0_REQ = 0x00000010, - GL2C_PERF_SEL_CLIENT1_REQ = 0x00000011, - GL2C_PERF_SEL_CLIENT2_REQ = 0x00000012, - GL2C_PERF_SEL_CLIENT3_REQ = 0x00000013, - GL2C_PERF_SEL_CLIENT4_REQ = 0x00000014, - GL2C_PERF_SEL_CLIENT5_REQ = 0x00000015, - GL2C_PERF_SEL_CLIENT6_REQ = 0x00000016, - GL2C_PERF_SEL_CLIENT7_REQ = 0x00000017, - GL2C_PERF_SEL_C_RW_S_REQ__GFX101 = 0x00000018, - GL2C_PERF_SEL_C_RW_US_REQ__GFX101 = 0x00000019, - GL2C_PERF_SEL_C_RO_S_REQ__GFX101 = 0x0000001a, - GL2C_PERF_SEL_C_RO_US_REQ__GFX101 = 0x0000001b, - GL2C_PERF_SEL_UC_REQ__GFX101 = 0x0000001c, - GL2C_PERF_SEL_LRU_REQ__GFX101 = 0x0000001d, - GL2C_PERF_SEL_STREAM_REQ__GFX101 = 0x0000001e, - GL2C_PERF_SEL_BYPASS_REQ__GFX101 = 0x0000001f, - GL2C_PERF_SEL_NOA_REQ__GFX101 = 0x00000020, - GL2C_PERF_SEL_SHARED_REQ__GFX101 = 0x00000021, - GL2C_PERF_SEL_HIT__GFX101 = 0x00000022, - GL2C_PERF_SEL_MISS__GFX101 = 0x00000023, - GL2C_PERF_SEL_FULL_HIT__GFX101 = 0x00000024, - GL2C_PERF_SEL_PARTIAL_32B_HIT__GFX101 = 0x00000025, - GL2C_PERF_SEL_PARTIAL_64B_HIT__GFX101 = 0x00000026, - GL2C_PERF_SEL_PARTIAL_96B_HIT__GFX101 = 0x00000027, - GL2C_PERF_SEL_DEWRITE_ALLOCATE_HIT__GFX101 = 0x00000028, - GL2C_PERF_SEL_FULLY_WRITTEN_HIT__GFX101 = 0x00000029, - GL2C_PERF_SEL_UNCACHED_WRITE__GFX101 = 0x0000002a, - GL2C_PERF_SEL_WRITEBACK__GFX101 = 0x0000002b, - GL2C_PERF_SEL_NORMAL_WRITEBACK__GFX101 = 0x0000002c, - GL2C_PERF_SEL_EVICT__GFX101 = 0x0000002d, - GL2C_PERF_SEL_NORMAL_EVICT__GFX101 = 0x0000002e, - GL2C_PERF_SEL_PROBE_EVICT__GFX101 = 0x0000002f, - GL2C_PERF_SEL_REQ_TO_MISS_QUEUE__GFX101 = 0x00000030, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT0__GFX101 = 0x00000031, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT1__GFX101 = 0x00000032, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT2__GFX101 = 0x00000033, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT3__GFX101 = 0x00000034, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT4__GFX101 = 0x00000035, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT5__GFX101 = 0x00000036, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT6__GFX101 = 0x00000037, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT7__GFX101 = 0x00000038, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT8__GFX101 = 0x00000039, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT9__GFX101 = 0x0000003a, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT10__GFX101 = 0x0000003b, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT11__GFX101 = 0x0000003c, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT12__GFX101 = 0x0000003d, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT13__GFX101 = 0x0000003e, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT14__GFX101 = 0x0000003f, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT15__GFX101 = 0x00000040, - GL2C_PERF_SEL_READ_32_REQ__GFX101 = 0x00000041, - GL2C_PERF_SEL_READ_64_REQ__GFX101 = 0x00000042, - GL2C_PERF_SEL_READ_128_REQ__GFX101 = 0x00000043, - GL2C_PERF_SEL_WRITE_32_REQ__GFX101 = 0x00000044, - GL2C_PERF_SEL_WRITE_64_REQ__GFX101 = 0x00000045, - GL2C_PERF_SEL_COMPRESSED_READ_0_REQ__GFX101 = 0x00000046, - GL2C_PERF_SEL_COMPRESSED_READ_32_REQ__GFX101 = 0x00000047, - GL2C_PERF_SEL_COMPRESSED_READ_64_REQ__GFX101 = 0x00000048, - GL2C_PERF_SEL_COMPRESSED_READ_96_REQ__GFX101 = 0x00000049, - GL2C_PERF_SEL_COMPRESSED_READ_128_REQ__GFX101 = 0x0000004a, - GL2C_PERF_SEL_MC_WRREQ__GFX101 = 0x0000004b, - GL2C_PERF_SEL_EA_WRREQ_64B__GFX101 = 0x0000004c, - GL2C_PERF_SEL_EA_WRREQ_PROBE_COMMAND__GFX101 = 0x0000004d, - GL2C_PERF_SEL_EA_WR_UNCACHED_32B__GFX101 = 0x0000004e, - GL2C_PERF_SEL_MC_WRREQ_STALL__GFX101 = 0x0000004f, - GL2C_PERF_SEL_EA_WRREQ_IO_CREDIT_STALL__GFX101 = 0x00000050, - GL2C_PERF_SEL_EA_WRREQ_GMI_CREDIT_STALL__GFX101 = 0x00000051, - GL2C_PERF_SEL_EA_WRREQ_DRAM_CREDIT_STALL__GFX101 = 0x00000052, - GL2C_PERF_SEL_TOO_MANY_EA_WRREQS_STALL__GFX101 = 0x00000053, - GL2C_PERF_SEL_MC_WRREQ_LEVEL__GFX101 = 0x00000054, - GL2C_PERF_SEL_EA_ATOMIC__GFX101 = 0x00000055, - GL2C_PERF_SEL_EA_ATOMIC_LEVEL__GFX101 = 0x00000056, - GL2C_PERF_SEL_MC_RDREQ__GFX101 = 0x00000057, - GL2C_PERF_SEL_EA_RDREQ_SPLIT__GFX101 = 0x00000058, - GL2C_PERF_SEL_EA_RDREQ_32B__GFX101 = 0x00000059, - GL2C_PERF_SEL_EA_RDREQ_64B__GFX101 = 0x0000005a, - GL2C_PERF_SEL_EA_RDREQ_96B__GFX101 = 0x0000005b, - GL2C_PERF_SEL_EA_RDREQ_128B__GFX101 = 0x0000005c, - GL2C_PERF_SEL_EA_RD_UNCACHED_32B__GFX101 = 0x0000005d, - GL2C_PERF_SEL_EA_RD_MDC_32B__GFX101 = 0x0000005e, - GL2C_PERF_SEL_EA_RD_COMPRESSED_32B__GFX101 = 0x0000005f, - GL2C_PERF_SEL_EA_RDREQ_IO_CREDIT_STALL__GFX101 = 0x00000060, - GL2C_PERF_SEL_EA_RDREQ_GMI_CREDIT_STALL__GFX101 = 0x00000061, - GL2C_PERF_SEL_EA_RDREQ_DRAM_CREDIT_STALL__GFX101 = 0x00000062, - GL2C_PERF_SEL_MC_RDREQ_LEVEL__GFX101 = 0x00000063, - GL2C_PERF_SEL_EA_RDREQ_DRAM__GFX101 = 0x00000064, - GL2C_PERF_SEL_EA_WRREQ_DRAM__GFX101 = 0x00000065, - GL2C_PERF_SEL_EA_RDREQ_DRAM_32B__GFX101 = 0x00000066, - GL2C_PERF_SEL_EA_WRREQ_DRAM_32B__GFX101 = 0x00000067, - GL2C_PERF_SEL_ONION_READ__GFX101 = 0x00000068, - GL2C_PERF_SEL_ONION_WRITE__GFX101 = 0x00000069, - GL2C_PERF_SEL_IO_READ__GFX101 = 0x0000006a, - GL2C_PERF_SEL_IO_WRITE__GFX101 = 0x0000006b, - GL2C_PERF_SEL_GARLIC_READ__GFX101 = 0x0000006c, - GL2C_PERF_SEL_GARLIC_WRITE__GFX101 = 0x0000006d, - GL2C_PERF_SEL_EA_OUTSTANDING__GFX101 = 0x0000006e, - GL2C_PERF_SEL_LATENCY_FIFO_FULL__GFX101 = 0x0000006f, - GL2C_PERF_SEL_SRC_FIFO_FULL__GFX101 = 0x00000070, - GL2C_PERF_SEL_TAG_STALL__GFX101 = 0x00000071, - GL2C_PERF_SEL_TAG_WRITEBACK_FIFO_FULL_STALL__GFX101 = 0x00000072, - GL2C_PERF_SEL_TAG_MISS_NOTHING_REPLACEABLE_STALL__GFX101 = 0x00000073, - GL2C_PERF_SEL_TAG_UNCACHED_WRITE_ATOMIC_FIFO_FULL_STALL__GFX101 = 0x00000074, - GL2C_PERF_SEL_TAG_NO_UNCACHED_WRITE_ATOMIC_ENTRIES_STALL__GFX101 = 0x00000075, - GL2C_PERF_SEL_TAG_PROBE_STALL__GFX101 = 0x00000076, - GL2C_PERF_SEL_TAG_PROBE_FILTER_STALL__GFX101 = 0x00000077, - GL2C_PERF_SEL_TAG_PROBE_FIFO_FULL_STALL__GFX101 = 0x00000078, - GL2C_PERF_SEL_TAG_READ_DST_STALL__GFX101 = 0x00000079, - GL2C_PERF_SEL_READ_RETURN_TIMEOUT__GFX101 = 0x0000007a, - GL2C_PERF_SEL_WRITEBACK_READ_TIMEOUT__GFX101 = 0x0000007b, - GL2C_PERF_SEL_READ_RETURN_FULL_BUBBLE__GFX101 = 0x0000007c, - GL2C_PERF_SEL_BUBBLE__GFX101 = 0x0000007d, - GL2C_PERF_SEL_IB_REQ__GFX101 = 0x0000007e, - GL2C_PERF_SEL_IB_STALL__GFX101 = 0x0000007f, - GL2C_PERF_SEL_IB_TAG_STALL__GFX101 = 0x00000080, - GL2C_PERF_SEL_IB_CM_STALL__GFX101 = 0x00000081, - GL2C_PERF_SEL_RETURN_ACK__GFX101 = 0x00000082, - GL2C_PERF_SEL_RETURN_DATA__GFX101 = 0x00000083, - GL2C_PERF_SEL_EA_RDRET_NACK__GFX101 = 0x00000084, - GL2C_PERF_SEL_EA_WRRET_NACK__GFX101 = 0x00000085, - GL2C_PERF_SEL_GL2A_LEVEL__GFX101 = 0x00000086, - GL2C_PERF_SEL_PROBE_FILTER_DISABLE_TRANSITION__GFX101 = 0x00000087, - GL2C_PERF_SEL_PROBE_FILTER_DISABLED__GFX101 = 0x00000088, - GL2C_PERF_SEL_ALL_TC_OP_WB_OR_INV_START__GFX101 = 0x00000089, - GL2C_PERF_SEL_ALL_TC_OP_WB_OR_INV_VOL_START__GFX101 = 0x0000008a, - GL2C_PERF_SEL_GCR_INV__GFX101 = 0x0000008b, - GL2C_PERF_SEL_GCR_WB__GFX101 = 0x0000008c, - GL2C_PERF_SEL_GCR_DISCARD__GFX101 = 0x0000008d, - GL2C_PERF_SEL_GCR_RANGE__GFX101 = 0x0000008e, - GL2C_PERF_SEL_GCR_ALL__GFX101 = 0x0000008f, - GL2C_PERF_SEL_GCR_VOL__GFX101 = 0x00000090, - GL2C_PERF_SEL_GCR_UNSHARED__GFX101 = 0x00000091, - GL2C_PERF_SEL_GCR_MDC_INV__GFX101 = 0x00000092, - GL2C_PERF_SEL_GCR_GL2_INV_ALL__GFX101 = 0x00000093, - GL2C_PERF_SEL_GCR_GL2_WB_ALL__GFX101 = 0x00000094, - GL2C_PERF_SEL_GCR_MDC_INV_ALL__GFX101 = 0x00000095, - GL2C_PERF_SEL_GCR_GL2_INV_RANGE__GFX101 = 0x00000096, - GL2C_PERF_SEL_GCR_GL2_WB_RANGE__GFX101 = 0x00000097, - GL2C_PERF_SEL_GCR_GL2_WB_INV_RANGE__GFX101 = 0x00000098, - GL2C_PERF_SEL_GCR_MDC_INV_RANGE__GFX101 = 0x00000099, - GL2C_PERF_SEL_ALL_GCR_INV_EVICT__GFX101 = 0x0000009a, - GL2C_PERF_SEL_ALL_GCR_INV_VOL_EVICT__GFX101 = 0x0000009b, - GL2C_PERF_SEL_ALL_GCR_WB_OR_INV_CYCLE__GFX101 = 0x0000009c, - GL2C_PERF_SEL_ALL_GCR_WB_OR_INV_VOL_CYCLE__GFX101 = 0x0000009d, - GL2C_PERF_SEL_ALL_GCR_WB_WRITEBACK__GFX101 = 0x0000009e, - GL2C_PERF_SEL_GCR_INVL2_VOL_CYCLE__GFX101 = 0x0000009f, - GL2C_PERF_SEL_GCR_INVL2_VOL_EVICT__GFX101 = 0x000000a0, - GL2C_PERF_SEL_GCR_INVL2_VOL_START__GFX101 = 0x000000a1, - GL2C_PERF_SEL_GCR_WBL2_VOL_CYCLE__GFX101 = 0x000000a2, - GL2C_PERF_SEL_GCR_WBL2_VOL_EVICT__GFX101 = 0x000000a3, - GL2C_PERF_SEL_GCR_WBL2_VOL_START__GFX101 = 0x000000a4, - GL2C_PERF_SEL_GCR_WBINVL2_CYCLE__GFX101 = 0x000000a5, - GL2C_PERF_SEL_GCR_WBINVL2_EVICT__GFX101 = 0x000000a6, - GL2C_PERF_SEL_GCR_WBINVL2_START__GFX101 = 0x000000a7, - GL2C_PERF_SEL_MDC_INV_METADATA__GFX101 = 0x000000a8, - GL2C_PERF_SEL_MDC_REQ__GFX101 = 0x000000a9, - GL2C_PERF_SEL_MDC_LEVEL__GFX101 = 0x000000aa, - GL2C_PERF_SEL_MDC_TAG_HIT__GFX101 = 0x000000ab, - GL2C_PERF_SEL_MDC_SECTOR_HIT__GFX101 = 0x000000ac, - GL2C_PERF_SEL_MDC_SECTOR_MISS__GFX101 = 0x000000ad, - GL2C_PERF_SEL_MDC_TAG_STALL__GFX101 = 0x000000ae, - GL2C_PERF_SEL_MDC_TAG_REPLACEMENT_LINE_IN_USE_STALL__GFX101 = 0x000000af, - GL2C_PERF_SEL_MDC_TAG_DESECTORIZATION_FIFO_FULL_STALL__GFX101 = 0x000000b0, - GL2C_PERF_SEL_MDC_TAG_WAITING_FOR_INVALIDATE_COMPLETION_STALL__GFX101 = 0x000000b1, - GL2C_PERF_SEL_CM_CHANNEL0_REQ__GFX101 = 0x000000b2, - GL2C_PERF_SEL_CM_CHANNEL1_REQ__GFX101 = 0x000000b3, - GL2C_PERF_SEL_CM_CHANNEL2_REQ__GFX101 = 0x000000b4, - GL2C_PERF_SEL_CM_CHANNEL3_REQ__GFX101 = 0x000000b5, - GL2C_PERF_SEL_CM_CHANNEL4_REQ__GFX101 = 0x000000b6, - GL2C_PERF_SEL_CM_CHANNEL5_REQ__GFX101 = 0x000000b7, - GL2C_PERF_SEL_CM_CHANNEL6_REQ__GFX101 = 0x000000b8, - GL2C_PERF_SEL_CM_CHANNEL7_REQ__GFX101 = 0x000000b9, - GL2C_PERF_SEL_CM_CHANNEL8_REQ__GFX101 = 0x000000ba, - GL2C_PERF_SEL_CM_CHANNEL9_REQ__GFX101 = 0x000000bb, - GL2C_PERF_SEL_CM_CHANNEL10_REQ__GFX101 = 0x000000bc, - GL2C_PERF_SEL_CM_CHANNEL11_REQ__GFX101 = 0x000000bd, - GL2C_PERF_SEL_CM_CHANNEL12_REQ__GFX101 = 0x000000be, - GL2C_PERF_SEL_CM_CHANNEL13_REQ__GFX101 = 0x000000bf, - GL2C_PERF_SEL_CM_CHANNEL14_REQ__GFX101 = 0x000000c0, - GL2C_PERF_SEL_CM_CHANNEL15_REQ__GFX101 = 0x000000c1, - GL2C_PERF_SEL_CM_CHANNEL16_REQ__GFX101 = 0x000000c2, - GL2C_PERF_SEL_CM_CHANNEL17_REQ__GFX101 = 0x000000c3, - GL2C_PERF_SEL_CM_CHANNEL18_REQ__GFX101 = 0x000000c4, - GL2C_PERF_SEL_CM_CHANNEL19_REQ__GFX101 = 0x000000c5, - GL2C_PERF_SEL_CM_CHANNEL20_REQ__GFX101 = 0x000000c6, - GL2C_PERF_SEL_CM_CHANNEL21_REQ__GFX101 = 0x000000c7, - GL2C_PERF_SEL_CM_CHANNEL22_REQ__GFX101 = 0x000000c8, - GL2C_PERF_SEL_CM_CHANNEL23_REQ__GFX101 = 0x000000c9, - GL2C_PERF_SEL_CM_CHANNEL24_REQ__GFX101 = 0x000000ca, - GL2C_PERF_SEL_CM_CHANNEL25_REQ__GFX101 = 0x000000cb, - GL2C_PERF_SEL_CM_CHANNEL26_REQ__GFX101 = 0x000000cc, - GL2C_PERF_SEL_CM_CHANNEL27_REQ__GFX101 = 0x000000cd, - GL2C_PERF_SEL_CM_CHANNEL28_REQ__GFX101 = 0x000000ce, - GL2C_PERF_SEL_CM_CHANNEL29_REQ__GFX101 = 0x000000cf, - GL2C_PERF_SEL_CM_CHANNEL30_REQ__GFX101 = 0x000000d0, - GL2C_PERF_SEL_CM_CHANNEL31_REQ__GFX101 = 0x000000d1, - GL2C_PERF_SEL_CM_COMP_ATOMIC_COLOR_REQ__GFX101 = 0x000000d2, - GL2C_PERF_SEL_CM_COMP_ATOMIC_DEPTH16_REQ__GFX101 = 0x000000d3, - GL2C_PERF_SEL_CM_COMP_ATOMIC_DEPTH32_REQ__GFX101 = 0x000000d4, - GL2C_PERF_SEL_CM_COMP_WRITE_COLOR_REQ__GFX101 = 0x000000d5, - GL2C_PERF_SEL_CM_COMP_WRITE_DEPTH16_REQ__GFX101 = 0x000000d6, - GL2C_PERF_SEL_CM_COMP_WRITE_DEPTH32_REQ__GFX101 = 0x000000d7, - GL2C_PERF_SEL_CM_COMP_WRITE_STENCIL_REQ__GFX101 = 0x000000d8, - GL2C_PERF_SEL_CM_COMP_READ_REQ__GFX101 = 0x000000d9, - GL2C_PERF_SEL_CM_READ_BACK_REQ__GFX101 = 0x000000da, - GL2C_PERF_SEL_CM_METADATA_WR_REQ__GFX101 = 0x000000db, - GL2C_PERF_SEL_CM_WR_ACK_REQ__GFX101 = 0x000000dc, - GL2C_PERF_SEL_CM_NO_ACK_REQ__GFX101 = 0x000000dd, - GL2C_PERF_SEL_CM_NOOP_REQ__GFX101 = 0x000000de, - GL2C_PERF_SEL_CM_COMP_COLOR_EN_REQ__GFX101 = 0x000000df, - GL2C_PERF_SEL_CM_COMP_COLOR_DIS_REQ__GFX101 = 0x000000e0, - GL2C_PERF_SEL_CM_COMP_STENCIL_REQ__GFX101 = 0x000000e1, - GL2C_PERF_SEL_CM_COMP_DEPTH16_REQ__GFX101 = 0x000000e2, - GL2C_PERF_SEL_CM_COMP_DEPTH32_REQ__GFX101 = 0x000000e3, - GL2C_PERF_SEL_CM_COLOR_32B_WR_REQ__GFX101 = 0x000000e4, - GL2C_PERF_SEL_CM_COLOR_64B_WR_REQ__GFX101 = 0x000000e5, - GL2C_PERF_SEL_CM_FULL_WRITE_REQ__GFX101 = 0x000000e6, - GL2C_PERF_SEL_CM_RVF_FULL__GFX101 = 0x000000e7, - GL2C_PERF_SEL_CM_SDR_FULL__GFX101 = 0x000000e8, - GL2C_PERF_SEL_CM_MERGE_BUF_FULL__GFX101 = 0x000000e9, - GL2C_PERF_SEL_CM_DCC_STALL__GFX101 = 0x000000ea, - GL2C_PERF_SEL_EA_WRREQ_SNOOP__GFX103COREPLUS = 0x00000054, - GL2C_PERF_SEL_EA_WRREQ_64B__GFX103COREPLUS = 0x00000055, - GL2C_PERF_SEL_EA_WRREQ_PROBE_COMMAND__GFX103COREPLUS = 0x00000056, - GL2C_PERF_SEL_EA_WR_UNCACHED_32B__GFX103COREPLUS = 0x00000057, - GL2C_PERF_SEL_EA_WRREQ_IO_CREDIT_STALL__GFX103COREPLUS = 0x00000059, - GL2C_PERF_SEL_EA_WRREQ_GMI_CREDIT_STALL__GFX103COREPLUS = 0x0000005a, - GL2C_PERF_SEL_EA_WRREQ_DRAM_CREDIT_STALL__GFX103COREPLUS = 0x0000005b, - GL2C_PERF_SEL_TOO_MANY_EA_WRREQS_STALL__GFX103COREPLUS = 0x0000005c, - GL2C_PERF_SEL_EA_ATOMIC__GFX103COREPLUS = 0x0000005e, - GL2C_PERF_SEL_EA_ATOMIC_LEVEL__GFX103COREPLUS = 0x0000005f, - GL2C_PERF_SEL_EA_RDREQ_SNOOP__GFX103COREPLUS = 0x00000061, - GL2C_PERF_SEL_EA_RDREQ_SPLIT__GFX103COREPLUS = 0x00000062, - GL2C_PERF_SEL_EA_RDREQ_32B__GFX103COREPLUS = 0x00000063, - GL2C_PERF_SEL_EA_RDREQ_64B__GFX103COREPLUS = 0x00000064, - GL2C_PERF_SEL_EA_RDREQ_96B__GFX103COREPLUS = 0x00000065, - GL2C_PERF_SEL_EA_RDREQ_128B__GFX103COREPLUS = 0x00000066, - GL2C_PERF_SEL_EA_RD_UNCACHED_32B__GFX103COREPLUS = 0x00000067, - GL2C_PERF_SEL_EA_RD_MDC_32B__GFX103COREPLUS = 0x00000068, - GL2C_PERF_SEL_EA_RD_COMPRESSED_32B__GFX103COREPLUS = 0x00000069, - GL2C_PERF_SEL_EA_RDREQ_IO_CREDIT_STALL__GFX103COREPLUS = 0x0000006a, - GL2C_PERF_SEL_EA_RDREQ_GMI_CREDIT_STALL__GFX103COREPLUS = 0x0000006b, - GL2C_PERF_SEL_EA_RDREQ_DRAM_CREDIT_STALL__GFX103COREPLUS = 0x0000006c, - GL2C_PERF_SEL_EA_RDREQ_DRAM__GFX103COREPLUS = 0x0000006e, - GL2C_PERF_SEL_EA_WRREQ_DRAM__GFX103COREPLUS = 0x0000006f, - GL2C_PERF_SEL_EA_RDREQ_DRAM_32B__GFX103COREPLUS = 0x00000070, - GL2C_PERF_SEL_EA_WRREQ_DRAM_32B__GFX103COREPLUS = 0x00000071, - GL2C_PERF_SEL_EA_OUTSTANDING__GFX103COREPLUS = 0x00000078, - GL2C_PERF_SEL_EA_RDRET_NACK__GFX103COREPLUS = 0x0000008e, - GL2C_PERF_SEL_EA_WRRET_NACK__GFX103COREPLUS = 0x0000008f, - GL2C_PERF_SEL_CM_DCC_OUT_CONST__GFX103DERIVATIVE = 0x000000fd, - GL2C_PERF_SEL_CLIENT8_REQ__GFX103PLUSEXCLUSIVE = 0x00000018, - GL2C_PERF_SEL_CLIENT9_REQ__GFX103PLUSEXCLUSIVE = 0x00000019, - GL2C_PERF_SEL_CLIENT10_REQ__GFX103PLUSEXCLUSIVE = 0x0000001a, - GL2C_PERF_SEL_CLIENT11_REQ__GFX103PLUSEXCLUSIVE = 0x0000001b, - GL2C_PERF_SEL_CLIENT12_REQ__GFX103PLUSEXCLUSIVE = 0x0000001c, - GL2C_PERF_SEL_CLIENT13_REQ__GFX103PLUSEXCLUSIVE = 0x0000001d, - GL2C_PERF_SEL_CLIENT14_REQ__GFX103PLUSEXCLUSIVE = 0x0000001e, - GL2C_PERF_SEL_CLIENT15_REQ__GFX103PLUSEXCLUSIVE = 0x0000001f, - GL2C_PERF_SEL_C_RW_S_REQ__GFX103PLUSEXCLUSIVE = 0x00000020, - GL2C_PERF_SEL_C_RW_US_REQ__GFX103PLUSEXCLUSIVE = 0x00000021, - GL2C_PERF_SEL_C_RO_S_REQ__GFX103PLUSEXCLUSIVE = 0x00000022, - GL2C_PERF_SEL_C_RO_US_REQ__GFX103PLUSEXCLUSIVE = 0x00000023, - GL2C_PERF_SEL_UC_REQ__GFX103PLUSEXCLUSIVE = 0x00000024, - GL2C_PERF_SEL_LRU_REQ__GFX103PLUSEXCLUSIVE = 0x00000025, - GL2C_PERF_SEL_STREAM_REQ__GFX103PLUSEXCLUSIVE = 0x00000026, - GL2C_PERF_SEL_BYPASS_REQ__GFX103PLUSEXCLUSIVE = 0x00000027, - GL2C_PERF_SEL_NOA_REQ__GFX103PLUSEXCLUSIVE = 0x00000028, - GL2C_PERF_SEL_SHARED_REQ__GFX103PLUSEXCLUSIVE = 0x00000029, - GL2C_PERF_SEL_HIT__GFX103PLUSEXCLUSIVE = 0x0000002a, - GL2C_PERF_SEL_MISS__GFX103PLUSEXCLUSIVE = 0x0000002b, - GL2C_PERF_SEL_FULL_HIT__GFX103PLUSEXCLUSIVE = 0x0000002c, - GL2C_PERF_SEL_PARTIAL_32B_HIT__GFX103PLUSEXCLUSIVE = 0x0000002d, - GL2C_PERF_SEL_PARTIAL_64B_HIT__GFX103PLUSEXCLUSIVE = 0x0000002e, - GL2C_PERF_SEL_PARTIAL_96B_HIT__GFX103PLUSEXCLUSIVE = 0x0000002f, - GL2C_PERF_SEL_DEWRITE_ALLOCATE_HIT__GFX103PLUSEXCLUSIVE = 0x00000030, - GL2C_PERF_SEL_FULLY_WRITTEN_HIT__GFX103PLUSEXCLUSIVE = 0x00000031, - GL2C_PERF_SEL_UNCACHED_WRITE__GFX103PLUSEXCLUSIVE = 0x00000032, - GL2C_PERF_SEL_WRITEBACK__GFX103PLUSEXCLUSIVE = 0x00000033, - GL2C_PERF_SEL_NORMAL_WRITEBACK__GFX103PLUSEXCLUSIVE = 0x00000034, - GL2C_PERF_SEL_EVICT__GFX103PLUSEXCLUSIVE = 0x00000035, - GL2C_PERF_SEL_NORMAL_EVICT__GFX103PLUSEXCLUSIVE = 0x00000036, - GL2C_PERF_SEL_PROBE_EVICT__GFX103PLUSEXCLUSIVE = 0x00000037, - GL2C_PERF_SEL_REQ_TO_MISS_QUEUE__GFX103PLUSEXCLUSIVE = 0x00000038, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT0__GFX103PLUSEXCLUSIVE = 0x00000039, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT1__GFX103PLUSEXCLUSIVE = 0x0000003a, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT2__GFX103PLUSEXCLUSIVE = 0x0000003b, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT3__GFX103PLUSEXCLUSIVE = 0x0000003c, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT4__GFX103PLUSEXCLUSIVE = 0x0000003d, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT5__GFX103PLUSEXCLUSIVE = 0x0000003e, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT6__GFX103PLUSEXCLUSIVE = 0x0000003f, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT7__GFX103PLUSEXCLUSIVE = 0x00000040, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT8__GFX103PLUSEXCLUSIVE = 0x00000041, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT9__GFX103PLUSEXCLUSIVE = 0x00000042, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT10__GFX103PLUSEXCLUSIVE = 0x00000043, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT11__GFX103PLUSEXCLUSIVE = 0x00000044, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT12__GFX103PLUSEXCLUSIVE = 0x00000045, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT13__GFX103PLUSEXCLUSIVE = 0x00000046, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT14__GFX103PLUSEXCLUSIVE = 0x00000047, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT15__GFX103PLUSEXCLUSIVE = 0x00000048, - GL2C_PERF_SEL_READ_32_REQ__GFX103PLUSEXCLUSIVE = 0x00000049, - GL2C_PERF_SEL_READ_64_REQ__GFX103PLUSEXCLUSIVE = 0x0000004a, - GL2C_PERF_SEL_READ_128_REQ__GFX103PLUSEXCLUSIVE = 0x0000004b, - GL2C_PERF_SEL_WRITE_32_REQ__GFX103PLUSEXCLUSIVE = 0x0000004c, - GL2C_PERF_SEL_WRITE_64_REQ__GFX103PLUSEXCLUSIVE = 0x0000004d, - GL2C_PERF_SEL_COMPRESSED_READ_0_REQ__GFX103PLUSEXCLUSIVE = 0x0000004e, - GL2C_PERF_SEL_COMPRESSED_READ_32_REQ__GFX103PLUSEXCLUSIVE = 0x0000004f, - GL2C_PERF_SEL_COMPRESSED_READ_64_REQ__GFX103PLUSEXCLUSIVE = 0x00000050, - GL2C_PERF_SEL_COMPRESSED_READ_96_REQ__GFX103PLUSEXCLUSIVE = 0x00000051, - GL2C_PERF_SEL_COMPRESSED_READ_128_REQ__GFX103PLUSEXCLUSIVE = 0x00000052, - GL2C_PERF_SEL_MC_WRREQ__GFX103PLUSEXCLUSIVE = 0x00000053, - GL2C_PERF_SEL_MC_WRREQ_STALL__GFX103PLUSEXCLUSIVE = 0x00000058, - GL2C_PERF_SEL_MC_WRREQ_LEVEL__GFX103PLUSEXCLUSIVE = 0x0000005d, - GL2C_PERF_SEL_MC_RDREQ__GFX103PLUSEXCLUSIVE = 0x00000060, - GL2C_PERF_SEL_MC_RDREQ_LEVEL__GFX103PLUSEXCLUSIVE = 0x0000006d, - GL2C_PERF_SEL_ONION_READ__GFX103PLUSEXCLUSIVE = 0x00000072, - GL2C_PERF_SEL_ONION_WRITE__GFX103PLUSEXCLUSIVE = 0x00000073, - GL2C_PERF_SEL_IO_READ__GFX103PLUSEXCLUSIVE = 0x00000074, - GL2C_PERF_SEL_IO_WRITE__GFX103PLUSEXCLUSIVE = 0x00000075, - GL2C_PERF_SEL_GARLIC_READ__GFX103PLUSEXCLUSIVE = 0x00000076, - GL2C_PERF_SEL_GARLIC_WRITE__GFX103PLUSEXCLUSIVE = 0x00000077, - GL2C_PERF_SEL_LATENCY_FIFO_FULL__GFX103PLUSEXCLUSIVE = 0x00000079, - GL2C_PERF_SEL_SRC_FIFO_FULL__GFX103PLUSEXCLUSIVE = 0x0000007a, - GL2C_PERF_SEL_TAG_STALL__GFX103PLUSEXCLUSIVE = 0x0000007b, - GL2C_PERF_SEL_TAG_WRITEBACK_FIFO_FULL_STALL__GFX103PLUSEXCLUSIVE = 0x0000007c, - GL2C_PERF_SEL_TAG_MISS_NOTHING_REPLACEABLE_STALL__GFX103PLUSEXCLUSIVE = 0x0000007d, - GL2C_PERF_SEL_TAG_UNCACHED_WRITE_ATOMIC_FIFO_FULL_STALL__GFX103PLUSEXCLUSIVE = 0x0000007e, - GL2C_PERF_SEL_TAG_NO_UNCACHED_WRITE_ATOMIC_ENTRIES_STALL__GFX103PLUSEXCLUSIVE = 0x0000007f, - GL2C_PERF_SEL_TAG_PROBE_STALL__GFX103PLUSEXCLUSIVE = 0x00000080, - GL2C_PERF_SEL_TAG_PROBE_FILTER_STALL__GFX103PLUSEXCLUSIVE = 0x00000081, - GL2C_PERF_SEL_TAG_PROBE_FIFO_FULL_STALL__GFX103PLUSEXCLUSIVE = 0x00000082, - GL2C_PERF_SEL_TAG_READ_DST_STALL__GFX103PLUSEXCLUSIVE = 0x00000083, - GL2C_PERF_SEL_READ_RETURN_TIMEOUT__GFX103PLUSEXCLUSIVE = 0x00000084, - GL2C_PERF_SEL_WRITEBACK_READ_TIMEOUT__GFX103PLUSEXCLUSIVE = 0x00000085, - GL2C_PERF_SEL_READ_RETURN_FULL_BUBBLE__GFX103PLUSEXCLUSIVE = 0x00000086, - GL2C_PERF_SEL_BUBBLE__GFX103PLUSEXCLUSIVE = 0x00000087, - GL2C_PERF_SEL_IB_REQ__GFX103PLUSEXCLUSIVE = 0x00000088, - GL2C_PERF_SEL_IB_STALL__GFX103PLUSEXCLUSIVE = 0x00000089, - GL2C_PERF_SEL_IB_TAG_STALL__GFX103PLUSEXCLUSIVE = 0x0000008a, - GL2C_PERF_SEL_IB_CM_STALL__GFX103PLUSEXCLUSIVE = 0x0000008b, - GL2C_PERF_SEL_RETURN_ACK__GFX103PLUSEXCLUSIVE = 0x0000008c, - GL2C_PERF_SEL_RETURN_DATA__GFX103PLUSEXCLUSIVE = 0x0000008d, - GL2C_PERF_SEL_GL2A_LEVEL__GFX103PLUSEXCLUSIVE = 0x00000090, - GL2C_PERF_SEL_PROBE_FILTER_DISABLE_TRANSITION__GFX103PLUSEXCLUSIVE = 0x00000091, - GL2C_PERF_SEL_PROBE_FILTER_DISABLED__GFX103PLUSEXCLUSIVE = 0x00000092, - GL2C_PERF_SEL_ALL_TC_OP_WB_OR_INV_START__GFX103PLUSEXCLUSIVE = 0x00000093, - GL2C_PERF_SEL_ALL_TC_OP_WB_OR_INV_VOL_START__GFX103PLUSEXCLUSIVE = 0x00000094, - GL2C_PERF_SEL_GCR_INV__GFX103PLUSEXCLUSIVE = 0x00000095, - GL2C_PERF_SEL_GCR_WB__GFX103PLUSEXCLUSIVE = 0x00000096, - GL2C_PERF_SEL_GCR_DISCARD__GFX103PLUSEXCLUSIVE = 0x00000097, - GL2C_PERF_SEL_GCR_RANGE__GFX103PLUSEXCLUSIVE = 0x00000098, - GL2C_PERF_SEL_GCR_ALL__GFX103PLUSEXCLUSIVE = 0x00000099, - GL2C_PERF_SEL_GCR_VOL__GFX103PLUSEXCLUSIVE = 0x0000009a, - GL2C_PERF_SEL_GCR_UNSHARED__GFX103PLUSEXCLUSIVE = 0x0000009b, - GL2C_PERF_SEL_GCR_MDC_INV__GFX103PLUSEXCLUSIVE = 0x0000009c, - GL2C_PERF_SEL_GCR_GL2_INV_ALL__GFX103PLUSEXCLUSIVE = 0x0000009d, - GL2C_PERF_SEL_GCR_GL2_WB_ALL__GFX103PLUSEXCLUSIVE = 0x0000009e, - GL2C_PERF_SEL_GCR_MDC_INV_ALL__GFX103PLUSEXCLUSIVE = 0x0000009f, - GL2C_PERF_SEL_GCR_GL2_INV_RANGE__GFX103PLUSEXCLUSIVE = 0x000000a0, - GL2C_PERF_SEL_GCR_GL2_WB_RANGE__GFX103PLUSEXCLUSIVE = 0x000000a1, - GL2C_PERF_SEL_GCR_GL2_WB_INV_RANGE__GFX103PLUSEXCLUSIVE = 0x000000a2, - GL2C_PERF_SEL_GCR_MDC_INV_RANGE__GFX103PLUSEXCLUSIVE = 0x000000a3, - GL2C_PERF_SEL_ALL_GCR_INV_EVICT__GFX103PLUSEXCLUSIVE = 0x000000a4, - GL2C_PERF_SEL_ALL_GCR_INV_VOL_EVICT__GFX103PLUSEXCLUSIVE = 0x000000a5, - GL2C_PERF_SEL_ALL_GCR_WB_OR_INV_CYCLE__GFX103PLUSEXCLUSIVE = 0x000000a6, - GL2C_PERF_SEL_ALL_GCR_WB_OR_INV_VOL_CYCLE__GFX103PLUSEXCLUSIVE = 0x000000a7, - GL2C_PERF_SEL_ALL_GCR_WB_WRITEBACK__GFX103PLUSEXCLUSIVE = 0x000000a8, - GL2C_PERF_SEL_GCR_INVL2_VOL_CYCLE__GFX103PLUSEXCLUSIVE = 0x000000a9, - GL2C_PERF_SEL_GCR_INVL2_VOL_EVICT__GFX103PLUSEXCLUSIVE = 0x000000aa, - GL2C_PERF_SEL_GCR_INVL2_VOL_START__GFX103PLUSEXCLUSIVE = 0x000000ab, - GL2C_PERF_SEL_GCR_WBL2_VOL_CYCLE__GFX103PLUSEXCLUSIVE = 0x000000ac, - GL2C_PERF_SEL_GCR_WBL2_VOL_START__GFX103PLUSEXCLUSIVE = 0x000000ad, - GL2C_PERF_SEL_GCR_WBINVL2_CYCLE__GFX103PLUSEXCLUSIVE = 0x000000ae, - GL2C_PERF_SEL_GCR_WBINVL2_EVICT__GFX103PLUSEXCLUSIVE = 0x000000af, - GL2C_PERF_SEL_GCR_WBINVL2_START__GFX103PLUSEXCLUSIVE = 0x000000b0, - GL2C_PERF_SEL_MDC_INV_METADATA__GFX103PLUSEXCLUSIVE = 0x000000b1, - GL2C_PERF_SEL_MDC_REQ__GFX103PLUSEXCLUSIVE = 0x000000b2, - GL2C_PERF_SEL_MDC_LEVEL__GFX103PLUSEXCLUSIVE = 0x000000b3, - GL2C_PERF_SEL_MDC_TAG_HIT__GFX103PLUSEXCLUSIVE = 0x000000b4, - GL2C_PERF_SEL_MDC_SECTOR_HIT__GFX103PLUSEXCLUSIVE = 0x000000b5, - GL2C_PERF_SEL_MDC_SECTOR_MISS__GFX103PLUSEXCLUSIVE = 0x000000b6, - GL2C_PERF_SEL_MDC_TAG_STALL__GFX103PLUSEXCLUSIVE = 0x000000b7, - GL2C_PERF_SEL_MDC_TAG_REPLACEMENT_LINE_IN_USE_STALL__GFX103PLUSEXCLUSIVE = 0x000000b8, - GL2C_PERF_SEL_MDC_TAG_DESECTORIZATION_FIFO_FULL_STALL__GFX103PLUSEXCLUSIVE = 0x000000b9, - GL2C_PERF_SEL_MDC_TAG_WAITING_FOR_INVALIDATE_COMPLETION_STALL__GFX103PLUSEXCLUSIVE = 0x000000ba, - GL2C_PERF_SEL_CM_CHANNEL0_REQ__GFX103PLUSEXCLUSIVE = 0x000000bb, - GL2C_PERF_SEL_CM_CHANNEL1_REQ__GFX103PLUSEXCLUSIVE = 0x000000bc, - GL2C_PERF_SEL_CM_CHANNEL2_REQ__GFX103PLUSEXCLUSIVE = 0x000000bd, - GL2C_PERF_SEL_CM_CHANNEL3_REQ__GFX103PLUSEXCLUSIVE = 0x000000be, - GL2C_PERF_SEL_CM_CHANNEL4_REQ__GFX103PLUSEXCLUSIVE = 0x000000bf, - GL2C_PERF_SEL_CM_CHANNEL5_REQ__GFX103PLUSEXCLUSIVE = 0x000000c0, - GL2C_PERF_SEL_CM_CHANNEL6_REQ__GFX103PLUSEXCLUSIVE = 0x000000c1, - GL2C_PERF_SEL_CM_CHANNEL7_REQ__GFX103PLUSEXCLUSIVE = 0x000000c2, - GL2C_PERF_SEL_CM_CHANNEL8_REQ__GFX103PLUSEXCLUSIVE = 0x000000c3, - GL2C_PERF_SEL_CM_CHANNEL9_REQ__GFX103PLUSEXCLUSIVE = 0x000000c4, - GL2C_PERF_SEL_CM_CHANNEL10_REQ__GFX103PLUSEXCLUSIVE = 0x000000c5, - GL2C_PERF_SEL_CM_CHANNEL11_REQ__GFX103PLUSEXCLUSIVE = 0x000000c6, - GL2C_PERF_SEL_CM_CHANNEL12_REQ__GFX103PLUSEXCLUSIVE = 0x000000c7, - GL2C_PERF_SEL_CM_CHANNEL13_REQ__GFX103PLUSEXCLUSIVE = 0x000000c8, - GL2C_PERF_SEL_CM_CHANNEL14_REQ__GFX103PLUSEXCLUSIVE = 0x000000c9, - GL2C_PERF_SEL_CM_CHANNEL15_REQ__GFX103PLUSEXCLUSIVE = 0x000000ca, - GL2C_PERF_SEL_CM_CHANNEL16_REQ__GFX103PLUSEXCLUSIVE = 0x000000cb, - GL2C_PERF_SEL_CM_CHANNEL17_REQ__GFX103PLUSEXCLUSIVE = 0x000000cc, - GL2C_PERF_SEL_CM_CHANNEL18_REQ__GFX103PLUSEXCLUSIVE = 0x000000cd, - GL2C_PERF_SEL_CM_CHANNEL19_REQ__GFX103PLUSEXCLUSIVE = 0x000000ce, - GL2C_PERF_SEL_CM_CHANNEL20_REQ__GFX103PLUSEXCLUSIVE = 0x000000cf, - GL2C_PERF_SEL_CM_CHANNEL21_REQ__GFX103PLUSEXCLUSIVE = 0x000000d0, - GL2C_PERF_SEL_CM_CHANNEL22_REQ__GFX103PLUSEXCLUSIVE = 0x000000d1, - GL2C_PERF_SEL_CM_CHANNEL23_REQ__GFX103PLUSEXCLUSIVE = 0x000000d2, - GL2C_PERF_SEL_CM_CHANNEL24_REQ__GFX103PLUSEXCLUSIVE = 0x000000d3, - GL2C_PERF_SEL_CM_CHANNEL25_REQ__GFX103PLUSEXCLUSIVE = 0x000000d4, - GL2C_PERF_SEL_CM_CHANNEL26_REQ__GFX103PLUSEXCLUSIVE = 0x000000d5, - GL2C_PERF_SEL_CM_CHANNEL27_REQ__GFX103PLUSEXCLUSIVE = 0x000000d6, - GL2C_PERF_SEL_CM_CHANNEL28_REQ__GFX103PLUSEXCLUSIVE = 0x000000d7, - GL2C_PERF_SEL_CM_CHANNEL29_REQ__GFX103PLUSEXCLUSIVE = 0x000000d8, - GL2C_PERF_SEL_CM_CHANNEL30_REQ__GFX103PLUSEXCLUSIVE = 0x000000d9, - GL2C_PERF_SEL_CM_CHANNEL31_REQ__GFX103PLUSEXCLUSIVE = 0x000000da, - GL2C_PERF_SEL_CM_COMP_ATOMIC_COLOR_REQ__GFX103PLUSEXCLUSIVE = 0x000000db, - GL2C_PERF_SEL_CM_COMP_ATOMIC_DEPTH16_REQ__GFX103PLUSEXCLUSIVE = 0x000000dc, - GL2C_PERF_SEL_CM_COMP_ATOMIC_DEPTH32_REQ__GFX103PLUSEXCLUSIVE = 0x000000dd, - GL2C_PERF_SEL_CM_COMP_ATOMIC_STENCIL_REQ__GFX103PLUSEXCLUSIVE = 0x000000de, - GL2C_PERF_SEL_CM_COMP_WRITE_COLOR_REQ__GFX103PLUSEXCLUSIVE = 0x000000df, - GL2C_PERF_SEL_CM_COMP_WRITE_DEPTH16_REQ__GFX103PLUSEXCLUSIVE = 0x000000e0, - GL2C_PERF_SEL_CM_COMP_WRITE_DEPTH32_REQ__GFX103PLUSEXCLUSIVE = 0x000000e1, - GL2C_PERF_SEL_CM_COMP_WRITE_STENCIL_REQ__GFX103PLUSEXCLUSIVE = 0x000000e2, - GL2C_PERF_SEL_CM_COMP_READ_REQ__GFX103PLUSEXCLUSIVE = 0x000000e3, - GL2C_PERF_SEL_CM_READ_BACK_REQ__GFX103PLUSEXCLUSIVE = 0x000000e4, - GL2C_PERF_SEL_CM_METADATA_WR_REQ__GFX103PLUSEXCLUSIVE = 0x000000e5, - GL2C_PERF_SEL_CM_WR_ACK_REQ__GFX103PLUSEXCLUSIVE = 0x000000e6, - GL2C_PERF_SEL_CM_NO_ACK_REQ__GFX103PLUSEXCLUSIVE = 0x000000e7, - GL2C_PERF_SEL_CM_NOOP_REQ__GFX103PLUSEXCLUSIVE = 0x000000e8, - GL2C_PERF_SEL_CM_COMP_COLOR_EN_REQ__GFX103PLUSEXCLUSIVE = 0x000000e9, - GL2C_PERF_SEL_CM_COMP_COLOR_DIS_REQ__GFX103PLUSEXCLUSIVE = 0x000000ea, - GL2C_PERF_SEL_CM_COMP_STENCIL_REQ__GFX103PLUSEXCLUSIVE = 0x000000eb, - GL2C_PERF_SEL_CM_COMP_DEPTH16_REQ__GFX103PLUSEXCLUSIVE = 0x000000ec, - GL2C_PERF_SEL_CM_COMP_DEPTH32_REQ__GFX103PLUSEXCLUSIVE = 0x000000ed, - GL2C_PERF_SEL_CM_COMP_RB_SKIP_REQ__GFX103PLUSEXCLUSIVE = 0x000000ee, - GL2C_PERF_SEL_CM_COLOR_32B_WR_REQ__GFX103PLUSEXCLUSIVE = 0x000000ef, - GL2C_PERF_SEL_CM_COLOR_64B_WR_REQ__GFX103PLUSEXCLUSIVE = 0x000000f0, - GL2C_PERF_SEL_CM_FULL_WRITE_REQ__GFX103PLUSEXCLUSIVE = 0x000000f1, - GL2C_PERF_SEL_CM_RVF_FULL__GFX103PLUSEXCLUSIVE = 0x000000f2, - GL2C_PERF_SEL_CM_SDR_FULL__GFX103PLUSEXCLUSIVE = 0x000000f3, - GL2C_PERF_SEL_CM_MERGE_BUF_FULL__GFX103PLUSEXCLUSIVE = 0x000000f4, - GL2C_PERF_SEL_CM_DCC_STALL__GFX103PLUSEXCLUSIVE = 0x000000f5, - GL2C_PERF_SEL_CM_DCC_IN_XFC__GFX103PLUSEXCLUSIVE = 0x000000f6, - GL2C_PERF_SEL_CM_DCC_OUT_XFC__GFX103PLUSEXCLUSIVE = 0x000000f7, - GL2C_PERF_SEL_CM_DCC_OUT_1x1__GFX103PLUSEXCLUSIVE = 0x000000f8, - GL2C_PERF_SEL_CM_DCC_OUT_1x2__GFX103PLUSEXCLUSIVE = 0x000000f9, - GL2C_PERF_SEL_CM_DCC_OUT_2x1__GFX103PLUSEXCLUSIVE = 0x000000fa, - GL2C_PERF_SEL_CM_DCC_OUT_2x2__GFX103PLUSEXCLUSIVE = 0x000000fb, - GL2C_PERF_SEL_CM_DCC_OUT_UNCOMP__GFX103PLUSEXCLUSIVE = 0x000000fc, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - GL2C_PERF_SEL_CM_DCC_OUT_CONST2SINGLE__GFX11 = 0x000000fd, - GL2C_PERF_SEL_CM_DCC_OUT_CONST2CLEAR__GFX11 = 0x000000fe, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT16__GFX11 = 0x000000ff, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT17__GFX11 = 0x00000100, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT18__GFX11 = 0x00000101, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT19__GFX11 = 0x00000102, -#endif -} GL2C_PERF_SEL; - -constexpr unsigned int MaxGl2cPerfSelGfx101 = GL2C_PERF_SEL_CM_DCC_STALL__GFX101; -constexpr unsigned int MaxGl2cPerfSelGfx103 = GL2C_PERF_SEL_CM_DCC_OUT_CONST__GFX103DERIVATIVE; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxGl2cPerfSelGfx11 = GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT19__GFX11; -#endif - -typedef enum GL2_CACHE_POLICIES { - GL2_CACHE_POLICY_LRU = 0x00000000, - GL2_CACHE_POLICY_STREAM = 0x00000001, - GL2_CACHE_POLICY_NOA = 0x00000002, - GL2_CACHE_POLICY_BYPASS = 0x00000003, -} GL2_CACHE_POLICIES; - -typedef enum GRBM_PERF_SEL { - GRBM_PERF_SEL_COUNT = 0x00000000, - GRBM_PERF_SEL_USER_DEFINED = 0x00000001, - GRBM_PERF_SEL_GUI_ACTIVE = 0x00000002, - GRBM_PERF_SEL_CP_BUSY = 0x00000003, - GRBM_PERF_SEL_CP_COHER_BUSY = 0x00000004, - GRBM_PERF_SEL_CP_DMA_BUSY = 0x00000005, - GRBM_PERF_SEL_CB_BUSY = 0x00000006, - GRBM_PERF_SEL_DB_BUSY = 0x00000007, - GRBM_PERF_SEL_PA_BUSY = 0x00000008, - GRBM_PERF_SEL_SC_BUSY = 0x00000009, - GRBM_PERF_SEL_SPI_BUSY = 0x0000000b, - GRBM_PERF_SEL_SX_BUSY = 0x0000000c, - GRBM_PERF_SEL_TA_BUSY = 0x0000000d, - GRBM_PERF_SEL_CB_CLEAN = 0x0000000e, - GRBM_PERF_SEL_DB_CLEAN = 0x0000000f, - GRBM_PERF_SEL_GDS_BUSY = 0x00000019, - GRBM_PERF_SEL_BCI_BUSY = 0x0000001a, - GRBM_PERF_SEL_RLC_BUSY = 0x0000001b, - GRBM_PERF_SEL_CPG_BUSY = 0x0000001d, - GRBM_PERF_SEL_CPC_BUSY = 0x0000001e, - GRBM_PERF_SEL_CPF_BUSY = 0x0000001f, - GRBM_PERF_SEL_UTCL2_BUSY = 0x00000022, - GRBM_PERF_SEL_RMI_BUSY = 0x00000024, - GRBM_PERF_SEL_CPAXI_BUSY = 0x00000025, - GRBM_PERF_SEL_EA_BUSY__CORE = 0x00000023, - GRBM_PERF_SEL_RESERVED_6__GFX09 = 0x0000000a, - GRBM_PERF_SEL_VGT_BUSY__GFX09 = 0x00000011, - GRBM_PERF_SEL_IA_BUSY__GFX09 = 0x00000017, - GRBM_PERF_SEL_IA_NO_DMA_BUSY__GFX09 = 0x00000018, - GRBM_PERF_SEL_TC_BUSY__GFX09 = 0x0000001c, - GRBM_PERF_SEL_WD_BUSY__GFX09 = 0x00000020, - GRBM_PERF_SEL_WD_NO_DMA_BUSY__GFX09 = 0x00000021, - GRBM_PERF_SEL_RESERVED_5__GFX09_10 = 0x00000010, - GRBM_PERF_SEL_RESERVED_4__GFX09_10 = 0x00000012, - GRBM_PERF_SEL_RESERVED_3__GFX09_10 = 0x00000013, - GRBM_PERF_SEL_RESERVED_2__GFX09_10 = 0x00000014, - GRBM_PERF_SEL_RESERVED_1__GFX09_10 = 0x00000015, - GRBM_PERF_SEL_RESERVED_0__GFX09_10 = 0x00000016, - GRBM_PERF_SEL_RESERVED_9__GFX10 = 0x00000011, - GRBM_PERF_SEL_RESERVED_8__GFX10 = 0x00000017, - GRBM_PERF_SEL_RESERVED_7__GFX10 = 0x00000018, - GRBM_PERF_SEL_RESERVED_6__GFX10CORE = 0x0000000a, - GRBM_PERF_SEL_PMM_BUSY__GFX10COREPLUS = 0x0000002c, - GRBM_PERF_SEL_GUS_BUSY__GFX10COREPLUS = 0x0000002d, - GRBM_PERF_SEL_TCP_BUSY__GFX10PLUS = 0x0000001c, - GRBM_PERF_SEL_GE_BUSY__GFX10PLUS = 0x00000020, - GRBM_PERF_SEL_GE_NO_DMA_BUSY__GFX10PLUS = 0x00000021, - GRBM_PERF_SEL_UTCL1_BUSY__GFX10PLUS = 0x00000027, - GRBM_PERF_SEL_GL2CC_BUSY__GFX10PLUS = 0x00000028, - GRBM_PERF_SEL_SDMA_BUSY__GFX10PLUS = 0x00000029, - GRBM_PERF_SEL_CH_BUSY__GFX10PLUS = 0x0000002a, - GRBM_PERF_SEL_PH_BUSY__GFX10PLUS = 0x0000002b, - GRBM_PERF_SEL_GL1CC_BUSY__GFX10PLUS = 0x0000002e, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - GRBM_PERF_SEL_ANY_ACTIVE_F_BUSY__GFX11 = 0x0000002f, - GRBM_PERF_SEL_GL1H_BUSY__GFX11 = 0x00000030, - GRBM_PERF_SEL_PC_BUSY__GFX11 = 0x00000031, -#endif - GRBM_PERF_SEL_RSVD_BUSY__RAPHAEL = 0x00000026, -} GRBM_PERF_SEL; - -constexpr unsigned int MaxGrbmPerfSelGfx09 = GRBM_PERF_SEL_CPAXI_BUSY; -constexpr unsigned int MaxGrbmPerfSelGfx10 = GRBM_PERF_SEL_GL1CC_BUSY__GFX10PLUS; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxGrbmPerfSelGfx11 = GRBM_PERF_SEL_PC_BUSY__GFX11; -#endif - -typedef enum GRBM_SE0_PERF_SEL { - GRBM_SE0_PERF_SEL_COUNT = 0x00000000, - GRBM_SE0_PERF_SEL_USER_DEFINED = 0x00000001, - GRBM_SE0_PERF_SEL_CB_BUSY = 0x00000002, - GRBM_SE0_PERF_SEL_DB_BUSY = 0x00000003, - GRBM_SE0_PERF_SEL_SC_BUSY = 0x00000004, - GRBM_SE0_PERF_SEL_SPI_BUSY = 0x00000006, - GRBM_SE0_PERF_SEL_SX_BUSY = 0x00000007, - GRBM_SE0_PERF_SEL_TA_BUSY = 0x00000008, - GRBM_SE0_PERF_SEL_CB_CLEAN = 0x00000009, - GRBM_SE0_PERF_SEL_DB_CLEAN = 0x0000000a, - GRBM_SE0_PERF_SEL_PA_BUSY = 0x0000000c, - GRBM_SE0_PERF_SEL_BCI_BUSY = 0x0000000e, - GRBM_SE0_PERF_SEL_RMI_BUSY = 0x0000000f, - GRBM_SE0_PERF_SEL_VGT_BUSY__GFX09 = 0x0000000d, - GRBM_SE0_PERF_SEL_RESERVED_1__GFX09_10 = 0x00000005, - GRBM_SE0_PERF_SEL_RESERVED_0__GFX09_10 = 0x0000000b, - GRBM_SE0_PERF_SEL_RESERVED_2__GFX10 = 0x0000000d, - GRBM_SE0_PERF_SEL_UTCL1_BUSY__GFX10PLUS = 0x00000010, - GRBM_SE0_PERF_SEL_TCP_BUSY__GFX10PLUS = 0x00000011, - GRBM_SE0_PERF_SEL_GL1CC_BUSY__GFX10PLUS = 0x00000012, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - GRBM_SE0_PERF_SEL_GL1H_BUSY__GFX11 = 0x00000013, - GRBM_SE0_PERF_SEL_PC_BUSY__GFX11 = 0x00000014, -#endif -} GRBM_SE0_PERF_SEL; - -constexpr unsigned int MaxGrbmSe0PerfSelGfx09 = GRBM_SE0_PERF_SEL_RMI_BUSY; -constexpr unsigned int MaxGrbmSe0PerfSelGfx10 = GRBM_SE0_PERF_SEL_GL1CC_BUSY__GFX10PLUS; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxGrbmSe0PerfSelGfx11 = GRBM_SE0_PERF_SEL_PC_BUSY__GFX11; -#endif - -typedef enum GRBM_SE1_PERF_SEL { - GRBM_SE1_PERF_SEL_COUNT = 0x00000000, - GRBM_SE1_PERF_SEL_USER_DEFINED = 0x00000001, - GRBM_SE1_PERF_SEL_CB_BUSY = 0x00000002, - GRBM_SE1_PERF_SEL_DB_BUSY = 0x00000003, - GRBM_SE1_PERF_SEL_SC_BUSY = 0x00000004, - GRBM_SE1_PERF_SEL_SPI_BUSY = 0x00000006, - GRBM_SE1_PERF_SEL_SX_BUSY = 0x00000007, - GRBM_SE1_PERF_SEL_TA_BUSY = 0x00000008, - GRBM_SE1_PERF_SEL_CB_CLEAN = 0x00000009, - GRBM_SE1_PERF_SEL_DB_CLEAN = 0x0000000a, - GRBM_SE1_PERF_SEL_PA_BUSY = 0x0000000c, - GRBM_SE1_PERF_SEL_BCI_BUSY = 0x0000000e, - GRBM_SE1_PERF_SEL_RMI_BUSY = 0x0000000f, - GRBM_SE1_PERF_SEL_RESERVED_1__GFX09 = 0x00000005, - GRBM_SE1_PERF_SEL_RESERVED_0__GFX09 = 0x0000000b, - GRBM_SE1_PERF_SEL_VGT_BUSY__GFX09 = 0x0000000d, - GRBM_SE1_PERF_SEL_RESERVED_1__GFX10CORE = 0x00000005, - GRBM_SE1_PERF_SEL_RESERVED_0__GFX10CORE = 0x0000000b, - GRBM_SE1_PERF_SEL_RESERVED_2__GFX10CORE = 0x0000000d, - GRBM_SE1_PERF_SEL_UTCL1_BUSY__GFX10COREPLUS = 0x00000010, - GRBM_SE1_PERF_SEL_TCP_BUSY__GFX10COREPLUS = 0x00000011, - GRBM_SE1_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS = 0x00000012, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - GRBM_SE1_PERF_SEL_GL1H_BUSY__GFX11 = 0x00000013, - GRBM_SE1_PERF_SEL_PC_BUSY__GFX11 = 0x00000014, -#endif -} GRBM_SE1_PERF_SEL; - -constexpr unsigned int MaxGrbmSe1PerfSelGfx09 = GRBM_SE1_PERF_SEL_RMI_BUSY; -constexpr unsigned int MaxGrbmSe1PerfSelGfx10Core = GRBM_SE1_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxGrbmSe1PerfSelGfx11 = GRBM_SE1_PERF_SEL_PC_BUSY__GFX11; -#endif - -typedef enum GRBM_SE2_PERF_SEL { - GRBM_SE2_PERF_SEL_COUNT = 0x00000000, - GRBM_SE2_PERF_SEL_USER_DEFINED = 0x00000001, - GRBM_SE2_PERF_SEL_CB_BUSY = 0x00000002, - GRBM_SE2_PERF_SEL_DB_BUSY = 0x00000003, - GRBM_SE2_PERF_SEL_SC_BUSY = 0x00000004, - GRBM_SE2_PERF_SEL_SPI_BUSY = 0x00000006, - GRBM_SE2_PERF_SEL_SX_BUSY = 0x00000007, - GRBM_SE2_PERF_SEL_TA_BUSY = 0x00000008, - GRBM_SE2_PERF_SEL_CB_CLEAN = 0x00000009, - GRBM_SE2_PERF_SEL_DB_CLEAN = 0x0000000a, - GRBM_SE2_PERF_SEL_PA_BUSY = 0x0000000c, - GRBM_SE2_PERF_SEL_BCI_BUSY = 0x0000000e, - GRBM_SE2_PERF_SEL_RMI_BUSY = 0x0000000f, - GRBM_SE2_PERF_SEL_RESERVED_1__GFX09 = 0x00000005, - GRBM_SE2_PERF_SEL_RESERVED_0__GFX09 = 0x0000000b, - GRBM_SE2_PERF_SEL_VGT_BUSY__GFX09 = 0x0000000d, - GRBM_SE2_PERF_SEL_RESERVED_1__GFX10CORE = 0x00000005, - GRBM_SE2_PERF_SEL_RESERVED_0__GFX10CORE = 0x0000000b, - GRBM_SE2_PERF_SEL_RESERVED_2__GFX10CORE = 0x0000000d, - GRBM_SE2_PERF_SEL_UTCL1_BUSY__GFX10COREPLUS = 0x00000010, - GRBM_SE2_PERF_SEL_TCP_BUSY__GFX10COREPLUS = 0x00000011, - GRBM_SE2_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS = 0x00000012, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - GRBM_SE2_PERF_SEL_GL1H_BUSY__GFX11 = 0x00000013, - GRBM_SE2_PERF_SEL_PC_BUSY__GFX11 = 0x00000014, -#endif -} GRBM_SE2_PERF_SEL; - -constexpr unsigned int MaxGrbmSe2PerfSelGfx09 = GRBM_SE2_PERF_SEL_RMI_BUSY; -constexpr unsigned int MaxGrbmSe2PerfSelGfx10Core = GRBM_SE2_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxGrbmSe2PerfSelGfx11 = GRBM_SE2_PERF_SEL_PC_BUSY__GFX11; -#endif - -typedef enum GRBM_SE3_PERF_SEL { - GRBM_SE3_PERF_SEL_COUNT = 0x00000000, - GRBM_SE3_PERF_SEL_USER_DEFINED = 0x00000001, - GRBM_SE3_PERF_SEL_CB_BUSY = 0x00000002, - GRBM_SE3_PERF_SEL_DB_BUSY = 0x00000003, - GRBM_SE3_PERF_SEL_SC_BUSY = 0x00000004, - GRBM_SE3_PERF_SEL_SPI_BUSY = 0x00000006, - GRBM_SE3_PERF_SEL_SX_BUSY = 0x00000007, - GRBM_SE3_PERF_SEL_TA_BUSY = 0x00000008, - GRBM_SE3_PERF_SEL_CB_CLEAN = 0x00000009, - GRBM_SE3_PERF_SEL_DB_CLEAN = 0x0000000a, - GRBM_SE3_PERF_SEL_PA_BUSY = 0x0000000c, - GRBM_SE3_PERF_SEL_BCI_BUSY = 0x0000000e, - GRBM_SE3_PERF_SEL_RMI_BUSY = 0x0000000f, - GRBM_SE3_PERF_SEL_RESERVED_1__GFX09 = 0x00000005, - GRBM_SE3_PERF_SEL_RESERVED_0__GFX09 = 0x0000000b, - GRBM_SE3_PERF_SEL_VGT_BUSY__GFX09 = 0x0000000d, - GRBM_SE3_PERF_SEL_RESERVED_1__GFX10CORE = 0x00000005, - GRBM_SE3_PERF_SEL_RESERVED_0__GFX10CORE = 0x0000000b, - GRBM_SE3_PERF_SEL_RESERVED_2__GFX10CORE = 0x0000000d, - GRBM_SE3_PERF_SEL_UTCL1_BUSY__GFX10COREPLUS = 0x00000010, - GRBM_SE3_PERF_SEL_TCP_BUSY__GFX10COREPLUS = 0x00000011, - GRBM_SE3_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS = 0x00000012, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - GRBM_SE3_PERF_SEL_GL1H_BUSY__GFX11 = 0x00000013, - GRBM_SE3_PERF_SEL_PC_BUSY__GFX11 = 0x00000014, -#endif -} GRBM_SE3_PERF_SEL; - -constexpr unsigned int MaxGrbmSe3PerfSelGfx09 = GRBM_SE3_PERF_SEL_RMI_BUSY; -constexpr unsigned int MaxGrbmSe3PerfSelGfx10Core = GRBM_SE3_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxGrbmSe3PerfSelGfx11 = GRBM_SE3_PERF_SEL_PC_BUSY__GFX11; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef enum GRBM_SE4_PERF_SEL { - GRBM_SE4_PERF_SEL_COUNT = 0x00000000, - GRBM_SE4_PERF_SEL_USER_DEFINED = 0x00000001, - GRBM_SE4_PERF_SEL_CB_BUSY = 0x00000002, - GRBM_SE4_PERF_SEL_DB_BUSY = 0x00000003, - GRBM_SE4_PERF_SEL_SC_BUSY = 0x00000004, - GRBM_SE4_PERF_SEL_SPI_BUSY = 0x00000006, - GRBM_SE4_PERF_SEL_SX_BUSY = 0x00000007, - GRBM_SE4_PERF_SEL_TA_BUSY = 0x00000008, - GRBM_SE4_PERF_SEL_CB_CLEAN = 0x00000009, - GRBM_SE4_PERF_SEL_DB_CLEAN = 0x0000000a, - GRBM_SE4_PERF_SEL_PA_BUSY = 0x0000000c, - GRBM_SE4_PERF_SEL_BCI_BUSY = 0x0000000e, - GRBM_SE4_PERF_SEL_RMI_BUSY = 0x0000000f, - GRBM_SE4_PERF_SEL_UTCL1_BUSY = 0x00000010, - GRBM_SE4_PERF_SEL_TCP_BUSY = 0x00000011, - GRBM_SE4_PERF_SEL_GL1CC_BUSY = 0x00000012, - GRBM_SE4_PERF_SEL_GL1H_BUSY = 0x00000013, - GRBM_SE4_PERF_SEL_PC_BUSY = 0x00000014, -} GRBM_SE4_PERF_SEL; - -constexpr unsigned int MaxGrbmSe4PerfSel = GRBM_SE4_PERF_SEL_PC_BUSY; - -typedef enum GRBM_SE5_PERF_SEL { - GRBM_SE5_PERF_SEL_COUNT = 0x00000000, - GRBM_SE5_PERF_SEL_USER_DEFINED = 0x00000001, - GRBM_SE5_PERF_SEL_CB_BUSY = 0x00000002, - GRBM_SE5_PERF_SEL_DB_BUSY = 0x00000003, - GRBM_SE5_PERF_SEL_SC_BUSY = 0x00000004, - GRBM_SE5_PERF_SEL_SPI_BUSY = 0x00000006, - GRBM_SE5_PERF_SEL_SX_BUSY = 0x00000007, - GRBM_SE5_PERF_SEL_TA_BUSY = 0x00000008, - GRBM_SE5_PERF_SEL_CB_CLEAN = 0x00000009, - GRBM_SE5_PERF_SEL_DB_CLEAN = 0x0000000a, - GRBM_SE5_PERF_SEL_PA_BUSY = 0x0000000c, - GRBM_SE5_PERF_SEL_BCI_BUSY = 0x0000000e, - GRBM_SE5_PERF_SEL_RMI_BUSY = 0x0000000f, - GRBM_SE5_PERF_SEL_UTCL1_BUSY = 0x00000010, - GRBM_SE5_PERF_SEL_TCP_BUSY = 0x00000011, - GRBM_SE5_PERF_SEL_GL1CC_BUSY = 0x00000012, - GRBM_SE5_PERF_SEL_GL1H_BUSY = 0x00000013, - GRBM_SE5_PERF_SEL_PC_BUSY = 0x00000014, -} GRBM_SE5_PERF_SEL; - -constexpr unsigned int MaxGrbmSe5PerfSel = GRBM_SE5_PERF_SEL_PC_BUSY; - -typedef enum GRBM_SE6_PERF_SEL { - GRBM_SE6_PERF_SEL_COUNT = 0x00000000, - GRBM_SE6_PERF_SEL_USER_DEFINED = 0x00000001, - GRBM_SE6_PERF_SEL_CB_BUSY = 0x00000002, - GRBM_SE6_PERF_SEL_DB_BUSY = 0x00000003, - GRBM_SE6_PERF_SEL_SC_BUSY = 0x00000004, - GRBM_SE6_PERF_SEL_SPI_BUSY = 0x00000006, - GRBM_SE6_PERF_SEL_SX_BUSY = 0x00000007, - GRBM_SE6_PERF_SEL_TA_BUSY = 0x00000008, - GRBM_SE6_PERF_SEL_CB_CLEAN = 0x00000009, - GRBM_SE6_PERF_SEL_DB_CLEAN = 0x0000000a, - GRBM_SE6_PERF_SEL_PA_BUSY = 0x0000000c, - GRBM_SE6_PERF_SEL_BCI_BUSY = 0x0000000e, - GRBM_SE6_PERF_SEL_RMI_BUSY = 0x0000000f, - GRBM_SE6_PERF_SEL_UTCL1_BUSY = 0x00000010, - GRBM_SE6_PERF_SEL_TCP_BUSY = 0x00000011, - GRBM_SE6_PERF_SEL_GL1CC_BUSY = 0x00000012, - GRBM_SE6_PERF_SEL_GL1H_BUSY = 0x00000013, - GRBM_SE6_PERF_SEL_PC_BUSY = 0x00000014, -} GRBM_SE6_PERF_SEL; - -constexpr unsigned int MaxGrbmSe6PerfSel = GRBM_SE6_PERF_SEL_PC_BUSY; - -typedef enum GRBM_SE7_PERF_SEL { - GRBM_SE7_PERF_SEL_COUNT = 0x00000000, - GRBM_SE7_PERF_SEL_USER_DEFINED = 0x00000001, - GRBM_SE7_PERF_SEL_CB_BUSY = 0x00000002, - GRBM_SE7_PERF_SEL_DB_BUSY = 0x00000003, - GRBM_SE7_PERF_SEL_SC_BUSY = 0x00000004, - GRBM_SE7_PERF_SEL_SPI_BUSY = 0x00000006, - GRBM_SE7_PERF_SEL_SX_BUSY = 0x00000007, - GRBM_SE7_PERF_SEL_TA_BUSY = 0x00000008, - GRBM_SE7_PERF_SEL_CB_CLEAN = 0x00000009, - GRBM_SE7_PERF_SEL_DB_CLEAN = 0x0000000a, - GRBM_SE7_PERF_SEL_PA_BUSY = 0x0000000c, - GRBM_SE7_PERF_SEL_BCI_BUSY = 0x0000000e, - GRBM_SE7_PERF_SEL_RMI_BUSY = 0x0000000f, - GRBM_SE7_PERF_SEL_UTCL1_BUSY = 0x00000010, - GRBM_SE7_PERF_SEL_TCP_BUSY = 0x00000011, - GRBM_SE7_PERF_SEL_GL1CC_BUSY = 0x00000012, - GRBM_SE7_PERF_SEL_GL1H_BUSY = 0x00000013, - GRBM_SE7_PERF_SEL_PC_BUSY = 0x00000014, -} GRBM_SE7_PERF_SEL; - -constexpr unsigned int MaxGrbmSe7PerfSel = GRBM_SE7_PERF_SEL_PC_BUSY; -#endif - -typedef enum IA_PERFCOUNT_SELECT { - ia_perf_grp_input_event_window_active = 0x00000000, - ia_perf_dma_data_fifo_full = 0x00000001, - ia_perf_UNUSED2 = 0x00000002, - ia_perf_UNUSED3 = 0x00000003, - ia_perf_UNUSED4 = 0x00000004, - ia_perf_UNUSED5 = 0x00000005, - ia_perf_UNUSED6 = 0x00000006, - ia_perf_MC_LAT_BIN_0 = 0x00000007, - ia_perf_MC_LAT_BIN_1 = 0x00000008, - ia_perf_MC_LAT_BIN_2 = 0x00000009, - ia_perf_MC_LAT_BIN_3 = 0x0000000a, - ia_perf_MC_LAT_BIN_4 = 0x0000000b, - ia_perf_MC_LAT_BIN_5 = 0x0000000c, - ia_perf_MC_LAT_BIN_6 = 0x0000000d, - ia_perf_MC_LAT_BIN_7 = 0x0000000e, - ia_perf_ia_busy = 0x0000000f, - ia_perf_sclk_reg_vld_event = 0x00000010, - ia_perf_sclk_input_vld = 0x00000011, - ia_perf_sclk_core_vld = 0x00000012, - ia_perf_sclk_inval_vld = 0x00000013, - ia_perf_ia_dma_return = 0x00000014, - ia_perf_IA_STALLED = 0x00000015, - ia_perf_shift_starved_pipe0_event = 0x00000016, - ia_perf_shift_starved_pipe1_event = 0x00000017, - ia_perf_utcl1_translation_miss_event = 0x00000018, - ia_perf_utcl1_translation_hit_event = 0x00000019, - ia_perf_utcl1_stall_event = 0x0000001a, - ia_perf_utcl1_retry_event = 0x0000001b, - ia_perf_utcl1_consecutive_retry_event = 0x0000001c, - ia_perf_utcl1_request_event_1__GFX09_0 = 0x0000001d, - ia_perf_utcl1_request_event_0__GFX09_0 = 0x0000001e, - ia_perf_utcl1_stall_utcl2_event__GFX09_0 = 0x0000001f, - ia_perf_utcl1_request_event__GFX09_1X = 0x0000001d, - ia_perf_utcl1_stall_utcl2_event__GFX09_1X = 0x0000001e, -} IA_PERFCOUNT_SELECT; - -constexpr unsigned int MaxIaPerfcountSelectGfx09_0 = ia_perf_utcl1_stall_utcl2_event__GFX09_0; -constexpr unsigned int MaxIaPerfcountSelectGfx09_1x = ia_perf_utcl1_stall_utcl2_event__GFX09_1X; - -typedef enum IMG_DATA_FORMAT { - IMG_DATA_FORMAT_INVALID = 0x00000000, - IMG_DATA_FORMAT_8 = 0x00000001, - IMG_DATA_FORMAT_16 = 0x00000002, - IMG_DATA_FORMAT_8_8 = 0x00000003, - IMG_DATA_FORMAT_32 = 0x00000004, - IMG_DATA_FORMAT_16_16 = 0x00000005, - IMG_DATA_FORMAT_10_11_11 = 0x00000006, - IMG_DATA_FORMAT_11_11_10 = 0x00000007, - IMG_DATA_FORMAT_10_10_10_2 = 0x00000008, - IMG_DATA_FORMAT_2_10_10_10 = 0x00000009, - IMG_DATA_FORMAT_8_8_8_8 = 0x0000000a, - IMG_DATA_FORMAT_32_32 = 0x0000000b, - IMG_DATA_FORMAT_16_16_16_16 = 0x0000000c, - IMG_DATA_FORMAT_32_32_32 = 0x0000000d, - IMG_DATA_FORMAT_32_32_32_32 = 0x0000000e, - IMG_DATA_FORMAT_RESERVED_15 = 0x0000000f, - IMG_DATA_FORMAT_5_6_5 = 0x00000010, - IMG_DATA_FORMAT_1_5_5_5 = 0x00000011, - IMG_DATA_FORMAT_5_5_5_1 = 0x00000012, - IMG_DATA_FORMAT_4_4_4_4 = 0x00000013, - IMG_DATA_FORMAT_8_24 = 0x00000014, - IMG_DATA_FORMAT_24_8 = 0x00000015, - IMG_DATA_FORMAT_X24_8_32 = 0x00000016, - IMG_DATA_FORMAT_6E4__CORE = 0x0000001f, - IMG_DATA_FORMAT_8_AS_8_8_8_8__GFX09 = 0x00000017, - IMG_DATA_FORMAT_ETC2_RGB__GFX09 = 0x00000018, - IMG_DATA_FORMAT_ETC2_RGBA__GFX09 = 0x00000019, - IMG_DATA_FORMAT_ETC2_R__GFX09 = 0x0000001a, - IMG_DATA_FORMAT_ETC2_RG__GFX09 = 0x0000001b, - IMG_DATA_FORMAT_ETC2_RGBA1__GFX09 = 0x0000001c, - IMG_DATA_FORMAT_RESERVED_29__GFX09 = 0x0000001d, - IMG_DATA_FORMAT_RESERVED_30__GFX09 = 0x0000001e, - IMG_DATA_FORMAT_GB_GR__GFX09 = 0x00000020, - IMG_DATA_FORMAT_BG_RG__GFX09 = 0x00000021, - IMG_DATA_FORMAT_5_9_9_9__GFX09 = 0x00000022, - IMG_DATA_FORMAT_BC1__GFX09 = 0x00000023, - IMG_DATA_FORMAT_BC2__GFX09 = 0x00000024, - IMG_DATA_FORMAT_BC3__GFX09 = 0x00000025, - IMG_DATA_FORMAT_BC4__GFX09 = 0x00000026, - IMG_DATA_FORMAT_BC5__GFX09 = 0x00000027, - IMG_DATA_FORMAT_BC6__GFX09 = 0x00000028, - IMG_DATA_FORMAT_BC7__GFX09 = 0x00000029, - IMG_DATA_FORMAT_16_AS_32_32__GFX09 = 0x0000002a, - IMG_DATA_FORMAT_16_AS_16_16_16_16__GFX09 = 0x0000002b, - IMG_DATA_FORMAT_16_AS_32_32_32_32__GFX09 = 0x0000002c, - IMG_DATA_FORMAT_FMASK__GFX09 = 0x0000002d, - IMG_DATA_FORMAT_ASTC_2D_LDR__GFX09 = 0x0000002e, - IMG_DATA_FORMAT_ASTC_2D_HDR__GFX09 = 0x0000002f, - IMG_DATA_FORMAT_ASTC_2D_LDR_SRGB__GFX09 = 0x00000030, - IMG_DATA_FORMAT_ASTC_3D_LDR__GFX09 = 0x00000031, - IMG_DATA_FORMAT_ASTC_3D_HDR__GFX09 = 0x00000032, - IMG_DATA_FORMAT_ASTC_3D_LDR_SRGB__GFX09 = 0x00000033, - IMG_DATA_FORMAT_N_IN_16__GFX09 = 0x00000034, - IMG_DATA_FORMAT_N_IN_16_16__GFX09 = 0x00000035, - IMG_DATA_FORMAT_N_IN_16_16_16_16__GFX09 = 0x00000036, - IMG_DATA_FORMAT_N_IN_16_AS_16_16_16_16__GFX09 = 0x00000037, - IMG_DATA_FORMAT_RESERVED_56__GFX09 = 0x00000038, - IMG_DATA_FORMAT_4_4__GFX09 = 0x00000039, - IMG_DATA_FORMAT_6_5_5__GFX09 = 0x0000003a, - IMG_DATA_FORMAT_S8_16__GFX09 = 0x0000003b, - IMG_DATA_FORMAT_S8_32__GFX09 = 0x0000003c, - IMG_DATA_FORMAT_8_AS_32__GFX09 = 0x0000003d, - IMG_DATA_FORMAT_8_AS_32_32__GFX09 = 0x0000003e, - IMG_DATA_FORMAT_32_AS_32_32_32_32__GFX09 = 0x0000003f, - IMG_DATA_FORMAT_RESERVED_24__GFX101 = 0x00000018, - IMG_DATA_FORMAT_RESERVED_25__GFX101 = 0x00000019, - IMG_DATA_FORMAT_RESERVED_26__GFX101 = 0x0000001a, - IMG_DATA_FORMAT_RESERVED_27__GFX101 = 0x0000001b, - IMG_DATA_FORMAT_RESERVED_28__GFX101 = 0x0000001c, - IMG_DATA_FORMAT_RESERVED_29__GFX101 = 0x0000001d, - IMG_DATA_FORMAT_RESERVED_30__GFX101 = 0x0000001e, - IMG_DATA_FORMAT_RESERVED_61__GFX101 = 0x0000003d, - IMG_DATA_FORMAT_RESERVED_86__GFX101 = 0x00000056, - IMG_DATA_FORMAT_RESERVED_87__GFX101 = 0x00000057, - IMG_DATA_FORMAT_RESERVED_88__GFX101 = 0x00000058, - IMG_DATA_FORMAT_YCBCR__GFX103 = 0x0000001d, - IMG_DATA_FORMAT_LOD_5P3_USCALED__GFX103 = 0x0000003d, - IMG_DATA_FORMAT_7E3__GFX103COREPLUS = 0x0000001e, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - IMG_DATA_FORMAT_RESERVED_44__GFX104PLUS = 0x0000002c, - IMG_DATA_FORMAT_RESERVED_45__GFX104PLUS = 0x0000002d, - IMG_DATA_FORMAT_RESERVED_46__GFX104PLUS = 0x0000002e, - IMG_DATA_FORMAT_YCBCR__GFX104PLUS = 0x0000002f, - IMG_DATA_FORMAT_MM_8__GFX104PLUS = 0x00000031, - IMG_DATA_FORMAT_MM_8_8__GFX104PLUS = 0x00000032, -#endif - IMG_DATA_FORMAT_GB_GR__GFX10CORE = 0x00000020, - IMG_DATA_FORMAT_BG_RG__GFX10CORE = 0x00000021, - IMG_DATA_FORMAT_5_9_9_9__GFX10CORE = 0x00000022, - IMG_DATA_FORMAT_BC1__GFX10CORE = 0x00000023, - IMG_DATA_FORMAT_BC2__GFX10CORE = 0x00000024, - IMG_DATA_FORMAT_BC3__GFX10CORE = 0x00000025, - IMG_DATA_FORMAT_BC4__GFX10CORE = 0x00000026, - IMG_DATA_FORMAT_BC5__GFX10CORE = 0x00000027, - IMG_DATA_FORMAT_BC6__GFX10CORE = 0x00000028, - IMG_DATA_FORMAT_BC7__GFX10CORE = 0x00000029, - IMG_DATA_FORMAT_FMASK8_S2_F1__GFX10CORE = 0x0000002c, - IMG_DATA_FORMAT_FMASK8_S4_F1__GFX10CORE = 0x0000002d, - IMG_DATA_FORMAT_FMASK8_S8_F1__GFX10CORE = 0x0000002e, - IMG_DATA_FORMAT_FMASK8_S2_F2__GFX10CORE = 0x0000002f, - IMG_DATA_FORMAT_FMASK8_S4_F2__GFX10CORE = 0x00000030, - IMG_DATA_FORMAT_FMASK8_S4_F4__GFX10CORE = 0x00000031, - IMG_DATA_FORMAT_FMASK16_S16_F1__GFX10CORE = 0x00000032, - IMG_DATA_FORMAT_FMASK16_S8_F2__GFX10CORE = 0x00000033, - IMG_DATA_FORMAT_FMASK32_S16_F2__GFX10CORE = 0x00000034, - IMG_DATA_FORMAT_FMASK32_S8_F4__GFX10CORE = 0x00000035, - IMG_DATA_FORMAT_FMASK32_S8_F8__GFX10CORE = 0x00000036, - IMG_DATA_FORMAT_FMASK64_S16_F4__GFX10CORE = 0x00000037, - IMG_DATA_FORMAT_FMASK64_S16_F8__GFX10CORE = 0x00000038, - IMG_DATA_FORMAT_4_4__GFX10CORE = 0x00000039, - IMG_DATA_FORMAT_6_5_5__GFX10CORE = 0x0000003a, - IMG_DATA_FORMAT_1__GFX10CORE = 0x0000003b, - IMG_DATA_FORMAT_1_REVERSED__GFX10CORE = 0x0000003c, - IMG_DATA_FORMAT_32_AS_32_32_32_32__GFX10CORE = 0x0000003f, - IMG_DATA_FORMAT_ETC2_RGB__GFX10CORE = 0x00000040, - IMG_DATA_FORMAT_ETC2_RGBA__GFX10CORE = 0x00000041, - IMG_DATA_FORMAT_ETC2_R__GFX10CORE = 0x00000042, - IMG_DATA_FORMAT_ETC2_RG__GFX10CORE = 0x00000043, - IMG_DATA_FORMAT_ETC2_RGBA1__GFX10CORE = 0x00000044, - IMG_DATA_FORMAT_ASTC_2D_LDR__GFX10CORE = 0x00000045, - IMG_DATA_FORMAT_ASTC_2D_HDR__GFX10CORE = 0x00000046, - IMG_DATA_FORMAT_ASTC_2D_LDR_SRGB__GFX10CORE = 0x00000047, - IMG_DATA_FORMAT_ASTC_3D_LDR__GFX10CORE = 0x00000048, - IMG_DATA_FORMAT_ASTC_3D_HDR__GFX10CORE = 0x00000049, - IMG_DATA_FORMAT_ASTC_3D_LDR_SRGB__GFX10CORE = 0x0000004a, - IMG_DATA_FORMAT_RESERVED_75__GFX10CORE = 0x0000004b, - IMG_DATA_FORMAT_MM_8__GFX10CORE = 0x0000004c, - IMG_DATA_FORMAT_MM_8_8__GFX10CORE = 0x0000004d, - IMG_DATA_FORMAT_MM_8_8_8_8__GFX10CORE = 0x0000004e, - IMG_DATA_FORMAT_MM_VYUY8__GFX10CORE = 0x0000004f, - IMG_DATA_FORMAT_MM_10_11_11__GFX10CORE = 0x00000050, - IMG_DATA_FORMAT_MM_2_10_10_10__GFX10CORE = 0x00000051, - IMG_DATA_FORMAT_MM_16_16_16_16__GFX10CORE = 0x00000052, - IMG_DATA_FORMAT_MM_10_IN_16__GFX10CORE = 0x00000053, - IMG_DATA_FORMAT_MM_10_IN_16_16__GFX10CORE = 0x00000054, - IMG_DATA_FORMAT_MM_10_IN_16_16_16_16__GFX10CORE = 0x00000055, - IMG_DATA_FORMAT_RESERVED_89__GFX10CORE = 0x00000059, - IMG_DATA_FORMAT_RESERVED_90__GFX10CORE = 0x0000005a, - IMG_DATA_FORMAT_RESERVED_91__GFX10CORE = 0x0000005b, - IMG_DATA_FORMAT_RESERVED_92__GFX10CORE = 0x0000005c, - IMG_DATA_FORMAT_RESERVED_93__GFX10CORE = 0x0000005d, - IMG_DATA_FORMAT_RESERVED_94__GFX10CORE = 0x0000005e, - IMG_DATA_FORMAT_RESERVED_95__GFX10CORE = 0x0000005f, - IMG_DATA_FORMAT_RESERVED_96__GFX10CORE = 0x00000060, - IMG_DATA_FORMAT_RESERVED_97__GFX10CORE = 0x00000061, - IMG_DATA_FORMAT_RESERVED_98__GFX10CORE = 0x00000062, - IMG_DATA_FORMAT_RESERVED_99__GFX10CORE = 0x00000063, - IMG_DATA_FORMAT_RESERVED_100__GFX10CORE = 0x00000064, - IMG_DATA_FORMAT_RESERVED_101__GFX10CORE = 0x00000065, - IMG_DATA_FORMAT_RESERVED_102__GFX10CORE = 0x00000066, - IMG_DATA_FORMAT_RESERVED_103__GFX10CORE = 0x00000067, - IMG_DATA_FORMAT_RESERVED_104__GFX10CORE = 0x00000068, - IMG_DATA_FORMAT_RESERVED_105__GFX10CORE = 0x00000069, - IMG_DATA_FORMAT_RESERVED_106__GFX10CORE = 0x0000006a, - IMG_DATA_FORMAT_RESERVED_107__GFX10CORE = 0x0000006b, - IMG_DATA_FORMAT_RESERVED_108__GFX10CORE = 0x0000006c, - IMG_DATA_FORMAT_RESERVED_109__GFX10CORE = 0x0000006d, - IMG_DATA_FORMAT_RESERVED_110__GFX10CORE = 0x0000006e, - IMG_DATA_FORMAT_RESERVED_111__GFX10CORE = 0x0000006f, - IMG_DATA_FORMAT_RESERVED_112__GFX10CORE = 0x00000070, - IMG_DATA_FORMAT_RESERVED_113__GFX10CORE = 0x00000071, - IMG_DATA_FORMAT_RESERVED_114__GFX10CORE = 0x00000072, - IMG_DATA_FORMAT_RESERVED_115__GFX10CORE = 0x00000073, - IMG_DATA_FORMAT_RESERVED_116__GFX10CORE = 0x00000074, - IMG_DATA_FORMAT_RESERVED_117__GFX10CORE = 0x00000075, - IMG_DATA_FORMAT_RESERVED_118__GFX10CORE = 0x00000076, - IMG_DATA_FORMAT_RESERVED_119__GFX10CORE = 0x00000077, - IMG_DATA_FORMAT_RESERVED_120__GFX10CORE = 0x00000078, - IMG_DATA_FORMAT_RESERVED_121__GFX10CORE = 0x00000079, - IMG_DATA_FORMAT_RESERVED_122__GFX10CORE = 0x0000007a, - IMG_DATA_FORMAT_RESERVED_123__GFX10CORE = 0x0000007b, - IMG_DATA_FORMAT_RESERVED_124__GFX10CORE = 0x0000007c, - IMG_DATA_FORMAT_RESERVED_125__GFX10CORE = 0x0000007d, - IMG_DATA_FORMAT_RESERVED_126__GFX10CORE = 0x0000007e, - IMG_DATA_FORMAT_RESERVED_127__GFX10CORE = 0x0000007f, - IMG_DATA_FORMAT_RESERVED_23__GFX10COREPLUS = 0x00000017, - IMG_DATA_FORMAT_RESERVED_42__GFX10COREPLUS = 0x0000002a, - IMG_DATA_FORMAT_RESERVED_62__GFX10COREPLUS = 0x0000003e, - IMG_DATA_FORMAT_RESERVED_43__GFX10PLUS = 0x0000002b, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - IMG_DATA_FORMAT_5_9_9_9__GFX11 = 0x00000018, - IMG_DATA_FORMAT_GB_GR__GFX11 = 0x00000019, - IMG_DATA_FORMAT_BG_RG__GFX11 = 0x0000001a, - IMG_DATA_FORMAT_4_4__GFX11 = 0x0000001b, - IMG_DATA_FORMAT_1__GFX11 = 0x0000001c, - IMG_DATA_FORMAT_1_REVERSED__GFX11 = 0x0000001d, - IMG_DATA_FORMAT_BC1__GFX11 = 0x00000020, - IMG_DATA_FORMAT_BC2__GFX11 = 0x00000021, - IMG_DATA_FORMAT_BC3__GFX11 = 0x00000022, - IMG_DATA_FORMAT_BC4__GFX11 = 0x00000023, - IMG_DATA_FORMAT_BC5__GFX11 = 0x00000024, - IMG_DATA_FORMAT_BC6__GFX11 = 0x00000025, - IMG_DATA_FORMAT_BC7__GFX11 = 0x00000026, - IMG_DATA_FORMAT_RESERVED_39__GFX11 = 0x00000027, - IMG_DATA_FORMAT_RESERVED_40__GFX11 = 0x00000028, - IMG_DATA_FORMAT_RESERVED_41__GFX11 = 0x00000029, - IMG_DATA_FORMAT_LOD_5P3_USCALED__GFX11 = 0x00000030, - IMG_DATA_FORMAT_MM_8_8_8_8__GFX11 = 0x00000033, - IMG_DATA_FORMAT_MM_VYUY8__GFX11 = 0x00000034, - IMG_DATA_FORMAT_MM_10_11_11__GFX11 = 0x00000035, - IMG_DATA_FORMAT_MM_2_10_10_10__GFX11 = 0x00000036, - IMG_DATA_FORMAT_MM_10_IN_16__GFX11 = 0x00000037, - IMG_DATA_FORMAT_MM_10_IN_16_16__GFX11 = 0x00000038, - IMG_DATA_FORMAT_MM_10_IN_16_16_16_16__GFX11 = 0x00000039, - IMG_DATA_FORMAT_MM_12_IN_16__GFX11 = 0x0000003a, - IMG_DATA_FORMAT_MM_12_IN_16_16__GFX11 = 0x0000003b, - IMG_DATA_FORMAT_MM_12_IN_16_16_16_16__GFX11 = 0x0000003c, - IMG_DATA_FORMAT_MM_16_16_16_16__GFX11 = 0x0000003d, - IMG_DATA_FORMAT_RESERVED_63__GFX11 = 0x0000003f, -#endif -#if CHIP_HDR_NAVI21 - IMG_DATA_FORMAT_RESERVED_24__NV21 = 0x00000018, - IMG_DATA_FORMAT_RESERVED_25__NV21 = 0x00000019, - IMG_DATA_FORMAT_RESERVED_26__NV21 = 0x0000001a, - IMG_DATA_FORMAT_RESERVED_27__NV21 = 0x0000001b, - IMG_DATA_FORMAT_RESERVED_28__NV21 = 0x0000001c, - IMG_DATA_FORMAT_MM_12_IN_16__NV21 = 0x00000056, - IMG_DATA_FORMAT_MM_12_IN_16_16__NV21 = 0x00000057, - IMG_DATA_FORMAT_MM_12_IN_16_16_16_16__NV21 = 0x00000058, -#endif -#if CHIP_HDR_NAVI22 - IMG_DATA_FORMAT_RESERVED_24__NV22 = 0x00000018, - IMG_DATA_FORMAT_RESERVED_25__NV22 = 0x00000019, - IMG_DATA_FORMAT_RESERVED_26__NV22 = 0x0000001a, - IMG_DATA_FORMAT_RESERVED_27__NV22 = 0x0000001b, - IMG_DATA_FORMAT_RESERVED_28__NV22 = 0x0000001c, - IMG_DATA_FORMAT_MM_12_IN_16__NV22 = 0x00000056, - IMG_DATA_FORMAT_MM_12_IN_16_16__NV22 = 0x00000057, - IMG_DATA_FORMAT_MM_12_IN_16_16_16_16__NV22 = 0x00000058, -#endif -#if CHIP_HDR_NAVI23 - IMG_DATA_FORMAT_RESERVED_24__NV23 = 0x00000018, - IMG_DATA_FORMAT_RESERVED_25__NV23 = 0x00000019, - IMG_DATA_FORMAT_RESERVED_26__NV23 = 0x0000001a, - IMG_DATA_FORMAT_RESERVED_27__NV23 = 0x0000001b, - IMG_DATA_FORMAT_RESERVED_28__NV23 = 0x0000001c, - IMG_DATA_FORMAT_MM_12_IN_16__NV23 = 0x00000056, - IMG_DATA_FORMAT_MM_12_IN_16_16__NV23 = 0x00000057, - IMG_DATA_FORMAT_MM_12_IN_16_16_16_16__NV23 = 0x00000058, -#endif -#if CHIP_HDR_NAVI24 - IMG_DATA_FORMAT_RESERVED_24__NV24 = 0x00000018, - IMG_DATA_FORMAT_RESERVED_25__NV24 = 0x00000019, - IMG_DATA_FORMAT_RESERVED_26__NV24 = 0x0000001a, - IMG_DATA_FORMAT_RESERVED_27__NV24 = 0x0000001b, - IMG_DATA_FORMAT_RESERVED_28__NV24 = 0x0000001c, - IMG_DATA_FORMAT_MM_12_IN_16__NV24 = 0x00000056, - IMG_DATA_FORMAT_MM_12_IN_16_16__NV24 = 0x00000057, - IMG_DATA_FORMAT_MM_12_IN_16_16_16_16__NV24 = 0x00000058, -#endif -#if CHIP_HDR_NAVI32 - IMG_DATA_FORMAT_FMASK8_S2_F2__NV32 = 0x0000003e, - IMG_DATA_FORMAT_FMASK8_S4_F4__NV32 = 0x0000003f, - IMG_DATA_FORMAT_FMASK8_S2_F1__NV32 = 0x00000040, - IMG_DATA_FORMAT_FMASK8_S4_F1__NV32 = 0x00000041, - IMG_DATA_FORMAT_FMASK8_S8_F1__NV32 = 0x00000042, - IMG_DATA_FORMAT_FMASK8_S4_F2__NV32 = 0x00000043, - IMG_DATA_FORMAT_FMASK16_S16_F1__NV32 = 0x00000044, - IMG_DATA_FORMAT_FMASK16_S8_F2__NV32 = 0x00000045, - IMG_DATA_FORMAT_FMASK32_S16_F2__NV32 = 0x00000046, - IMG_DATA_FORMAT_FMASK32_S8_F4__NV32 = 0x00000047, - IMG_DATA_FORMAT_FMASK64_S16_F4__NV32 = 0x00000048, - IMG_DATA_FORMAT_FMASK64_S16_F8__NV32 = 0x00000049, - IMG_DATA_FORMAT_FMASK32_S8_F8__NV32 = 0x0000004a, - IMG_DATA_FORMAT_RESERVED_75__NV32 = 0x0000004b, - IMG_DATA_FORMAT_RESERVED_76__NV32 = 0x0000004c, - IMG_DATA_FORMAT_RESERVED_77__NV32 = 0x0000004d, - IMG_DATA_FORMAT_RESERVED_78__NV32 = 0x0000004e, - IMG_DATA_FORMAT_RESERVED_79__NV32 = 0x0000004f, - IMG_DATA_FORMAT_RESERVED_80__NV32 = 0x00000050, - IMG_DATA_FORMAT_RESERVED_81__NV32 = 0x00000051, - IMG_DATA_FORMAT_RESERVED_82__NV32 = 0x00000052, - IMG_DATA_FORMAT_RESERVED_83__NV32 = 0x00000053, - IMG_DATA_FORMAT_RESERVED_84__NV32 = 0x00000054, - IMG_DATA_FORMAT_RESERVED_85__NV32 = 0x00000055, - IMG_DATA_FORMAT_RESERVED_86__NV32 = 0x00000056, - IMG_DATA_FORMAT_RESERVED_87__NV32 = 0x00000057, - IMG_DATA_FORMAT_RESERVED_88__NV32 = 0x00000058, - IMG_DATA_FORMAT_RESERVED_89__NV32 = 0x00000059, - IMG_DATA_FORMAT_RESERVED_90__NV32 = 0x0000005a, - IMG_DATA_FORMAT_RESERVED_91__NV32 = 0x0000005b, - IMG_DATA_FORMAT_RESERVED_92__NV32 = 0x0000005c, - IMG_DATA_FORMAT_RESERVED_93__NV32 = 0x0000005d, - IMG_DATA_FORMAT_RESERVED_94__NV32 = 0x0000005e, - IMG_DATA_FORMAT_RESERVED_95__NV32 = 0x0000005f, - IMG_DATA_FORMAT_RESERVED_96__NV32 = 0x00000060, - IMG_DATA_FORMAT_RESERVED_97__NV32 = 0x00000061, - IMG_DATA_FORMAT_RESERVED_98__NV32 = 0x00000062, - IMG_DATA_FORMAT_RESERVED_99__NV32 = 0x00000063, - IMG_DATA_FORMAT_RESERVED_100__NV32 = 0x00000064, - IMG_DATA_FORMAT_RESERVED_101__NV32 = 0x00000065, - IMG_DATA_FORMAT_RESERVED_102__NV32 = 0x00000066, - IMG_DATA_FORMAT_RESERVED_103__NV32 = 0x00000067, - IMG_DATA_FORMAT_RESERVED_104__NV32 = 0x00000068, - IMG_DATA_FORMAT_RESERVED_105__NV32 = 0x00000069, - IMG_DATA_FORMAT_RESERVED_106__NV32 = 0x0000006a, - IMG_DATA_FORMAT_RESERVED_107__NV32 = 0x0000006b, - IMG_DATA_FORMAT_RESERVED_108__NV32 = 0x0000006c, - IMG_DATA_FORMAT_RESERVED_109__NV32 = 0x0000006d, - IMG_DATA_FORMAT_RESERVED_110__NV32 = 0x0000006e, - IMG_DATA_FORMAT_RESERVED_111__NV32 = 0x0000006f, - IMG_DATA_FORMAT_RESERVED_112__NV32 = 0x00000070, - IMG_DATA_FORMAT_RESERVED_113__NV32 = 0x00000071, - IMG_DATA_FORMAT_RESERVED_114__NV32 = 0x00000072, - IMG_DATA_FORMAT_RESERVED_115__NV32 = 0x00000073, - IMG_DATA_FORMAT_RESERVED_116__NV32 = 0x00000074, - IMG_DATA_FORMAT_RESERVED_117__NV32 = 0x00000075, - IMG_DATA_FORMAT_RESERVED_118__NV32 = 0x00000076, - IMG_DATA_FORMAT_RESERVED_119__NV32 = 0x00000077, - IMG_DATA_FORMAT_RESERVED_120__NV32 = 0x00000078, - IMG_DATA_FORMAT_RESERVED_121__NV32 = 0x00000079, - IMG_DATA_FORMAT_RESERVED_122__NV32 = 0x0000007a, - IMG_DATA_FORMAT_RESERVED_123__NV32 = 0x0000007b, - IMG_DATA_FORMAT_RESERVED_124__NV32 = 0x0000007c, - IMG_DATA_FORMAT_RESERVED_125__NV32 = 0x0000007d, - IMG_DATA_FORMAT_RESERVED_126__NV32 = 0x0000007e, - IMG_DATA_FORMAT_RESERVED_127__NV32 = 0x0000007f, -#endif -#if CHIP_HDR_NAVI33 - IMG_DATA_FORMAT_FMASK8_S2_F2__NV33 = 0x0000003e, - IMG_DATA_FORMAT_FMASK8_S4_F4__NV33 = 0x0000003f, - IMG_DATA_FORMAT_FMASK8_S2_F1__NV33 = 0x00000040, - IMG_DATA_FORMAT_FMASK8_S4_F1__NV33 = 0x00000041, - IMG_DATA_FORMAT_FMASK8_S8_F1__NV33 = 0x00000042, - IMG_DATA_FORMAT_FMASK8_S4_F2__NV33 = 0x00000043, - IMG_DATA_FORMAT_FMASK16_S16_F1__NV33 = 0x00000044, - IMG_DATA_FORMAT_FMASK16_S8_F2__NV33 = 0x00000045, - IMG_DATA_FORMAT_FMASK32_S16_F2__NV33 = 0x00000046, - IMG_DATA_FORMAT_FMASK32_S8_F4__NV33 = 0x00000047, - IMG_DATA_FORMAT_FMASK64_S16_F4__NV33 = 0x00000048, - IMG_DATA_FORMAT_FMASK64_S16_F8__NV33 = 0x00000049, - IMG_DATA_FORMAT_FMASK32_S8_F8__NV33 = 0x0000004a, - IMG_DATA_FORMAT_RESERVED_75__NV33 = 0x0000004b, - IMG_DATA_FORMAT_RESERVED_76__NV33 = 0x0000004c, - IMG_DATA_FORMAT_RESERVED_77__NV33 = 0x0000004d, - IMG_DATA_FORMAT_RESERVED_78__NV33 = 0x0000004e, - IMG_DATA_FORMAT_RESERVED_79__NV33 = 0x0000004f, - IMG_DATA_FORMAT_RESERVED_80__NV33 = 0x00000050, - IMG_DATA_FORMAT_RESERVED_81__NV33 = 0x00000051, - IMG_DATA_FORMAT_RESERVED_82__NV33 = 0x00000052, - IMG_DATA_FORMAT_RESERVED_83__NV33 = 0x00000053, - IMG_DATA_FORMAT_RESERVED_84__NV33 = 0x00000054, - IMG_DATA_FORMAT_RESERVED_85__NV33 = 0x00000055, - IMG_DATA_FORMAT_RESERVED_86__NV33 = 0x00000056, - IMG_DATA_FORMAT_RESERVED_87__NV33 = 0x00000057, - IMG_DATA_FORMAT_RESERVED_88__NV33 = 0x00000058, - IMG_DATA_FORMAT_RESERVED_89__NV33 = 0x00000059, - IMG_DATA_FORMAT_RESERVED_90__NV33 = 0x0000005a, - IMG_DATA_FORMAT_RESERVED_91__NV33 = 0x0000005b, - IMG_DATA_FORMAT_RESERVED_92__NV33 = 0x0000005c, - IMG_DATA_FORMAT_RESERVED_93__NV33 = 0x0000005d, - IMG_DATA_FORMAT_RESERVED_94__NV33 = 0x0000005e, - IMG_DATA_FORMAT_RESERVED_95__NV33 = 0x0000005f, - IMG_DATA_FORMAT_RESERVED_96__NV33 = 0x00000060, - IMG_DATA_FORMAT_RESERVED_97__NV33 = 0x00000061, - IMG_DATA_FORMAT_RESERVED_98__NV33 = 0x00000062, - IMG_DATA_FORMAT_RESERVED_99__NV33 = 0x00000063, - IMG_DATA_FORMAT_RESERVED_100__NV33 = 0x00000064, - IMG_DATA_FORMAT_RESERVED_101__NV33 = 0x00000065, - IMG_DATA_FORMAT_RESERVED_102__NV33 = 0x00000066, - IMG_DATA_FORMAT_RESERVED_103__NV33 = 0x00000067, - IMG_DATA_FORMAT_RESERVED_104__NV33 = 0x00000068, - IMG_DATA_FORMAT_RESERVED_105__NV33 = 0x00000069, - IMG_DATA_FORMAT_RESERVED_106__NV33 = 0x0000006a, - IMG_DATA_FORMAT_RESERVED_107__NV33 = 0x0000006b, - IMG_DATA_FORMAT_RESERVED_108__NV33 = 0x0000006c, - IMG_DATA_FORMAT_RESERVED_109__NV33 = 0x0000006d, - IMG_DATA_FORMAT_RESERVED_110__NV33 = 0x0000006e, - IMG_DATA_FORMAT_RESERVED_111__NV33 = 0x0000006f, - IMG_DATA_FORMAT_RESERVED_112__NV33 = 0x00000070, - IMG_DATA_FORMAT_RESERVED_113__NV33 = 0x00000071, - IMG_DATA_FORMAT_RESERVED_114__NV33 = 0x00000072, - IMG_DATA_FORMAT_RESERVED_115__NV33 = 0x00000073, - IMG_DATA_FORMAT_RESERVED_116__NV33 = 0x00000074, - IMG_DATA_FORMAT_RESERVED_117__NV33 = 0x00000075, - IMG_DATA_FORMAT_RESERVED_118__NV33 = 0x00000076, - IMG_DATA_FORMAT_RESERVED_119__NV33 = 0x00000077, - IMG_DATA_FORMAT_RESERVED_120__NV33 = 0x00000078, - IMG_DATA_FORMAT_RESERVED_121__NV33 = 0x00000079, - IMG_DATA_FORMAT_RESERVED_122__NV33 = 0x0000007a, - IMG_DATA_FORMAT_RESERVED_123__NV33 = 0x0000007b, - IMG_DATA_FORMAT_RESERVED_124__NV33 = 0x0000007c, - IMG_DATA_FORMAT_RESERVED_125__NV33 = 0x0000007d, - IMG_DATA_FORMAT_RESERVED_126__NV33 = 0x0000007e, - IMG_DATA_FORMAT_RESERVED_127__NV33 = 0x0000007f, -#endif - IMG_DATA_FORMAT_DXT3A__RAPHAEL = 0x00000018, - IMG_DATA_FORMAT_DXT3A_AS_1_1_1_1__RAPHAEL = 0x00000019, - IMG_DATA_FORMAT_DXT5A__RAPHAEL = 0x0000001a, - IMG_DATA_FORMAT_DXN__RAPHAEL = 0x0000001b, - IMG_DATA_FORMAT_CTX1__RAPHAEL = 0x0000001c, - IMG_DATA_FORMAT_RESERVED_86__RAPHAEL = 0x00000056, - IMG_DATA_FORMAT_RESERVED_87__RAPHAEL = 0x00000057, - IMG_DATA_FORMAT_RESERVED_88__RAPHAEL = 0x00000058, - IMG_DATA_FORMAT_DXT3A__REMBRANDT = 0x00000018, - IMG_DATA_FORMAT_DXT3A_AS_1_1_1_1__REMBRANDT = 0x00000019, - IMG_DATA_FORMAT_DXT5A__REMBRANDT = 0x0000001a, - IMG_DATA_FORMAT_DXN__REMBRANDT = 0x0000001b, - IMG_DATA_FORMAT_CTX1__REMBRANDT = 0x0000001c, - IMG_DATA_FORMAT_RESERVED_86__REMBRANDT = 0x00000056, - IMG_DATA_FORMAT_RESERVED_87__REMBRANDT = 0x00000057, - IMG_DATA_FORMAT_RESERVED_88__REMBRANDT = 0x00000058, -} IMG_DATA_FORMAT; - -typedef enum IMG_FMT { - IMG_FMT_INVALID = 0x00000000, - IMG_FMT_8_UNORM = 0x00000001, - IMG_FMT_8_SNORM = 0x00000002, - IMG_FMT_8_USCALED = 0x00000003, - IMG_FMT_8_SSCALED = 0x00000004, - IMG_FMT_8_UINT = 0x00000005, - IMG_FMT_8_SINT = 0x00000006, - IMG_FMT_16_UNORM = 0x00000007, - IMG_FMT_16_SNORM = 0x00000008, - IMG_FMT_16_USCALED = 0x00000009, - IMG_FMT_16_SSCALED = 0x0000000a, - IMG_FMT_16_UINT = 0x0000000b, - IMG_FMT_16_SINT = 0x0000000c, - IMG_FMT_16_FLOAT = 0x0000000d, - IMG_FMT_8_8_UNORM = 0x0000000e, - IMG_FMT_8_8_SNORM = 0x0000000f, - IMG_FMT_8_8_USCALED = 0x00000010, - IMG_FMT_8_8_SSCALED = 0x00000011, - IMG_FMT_8_8_UINT = 0x00000012, - IMG_FMT_8_8_SINT = 0x00000013, - IMG_FMT_32_UINT = 0x00000014, - IMG_FMT_32_SINT = 0x00000015, - IMG_FMT_32_FLOAT = 0x00000016, - IMG_FMT_16_16_UNORM = 0x00000017, - IMG_FMT_16_16_SNORM = 0x00000018, - IMG_FMT_16_16_USCALED = 0x00000019, - IMG_FMT_16_16_SSCALED = 0x0000001a, - IMG_FMT_16_16_UINT = 0x0000001b, - IMG_FMT_16_16_SINT = 0x0000001c, - IMG_FMT_16_16_FLOAT = 0x0000001d, - IMG_FMT_RESERVED_98 = 0x00000062, - IMG_FMT_RESERVED_99 = 0x00000063, - IMG_FMT_RESERVED_100 = 0x00000064, - IMG_FMT_RESERVED_101 = 0x00000065, - IMG_FMT_RESERVED_102 = 0x00000066, - IMG_FMT_RESERVED_103 = 0x00000067, - IMG_FMT_RESERVED_104 = 0x00000068, - IMG_FMT_RESERVED_105 = 0x00000069, - IMG_FMT_RESERVED_106 = 0x0000006a, - IMG_FMT_RESERVED_107 = 0x0000006b, - IMG_FMT_RESERVED_108 = 0x0000006c, - IMG_FMT_RESERVED_155 = 0x0000009b, - IMG_FMT_RESERVED_115__GFX10 = 0x00000073, - IMG_FMT_RESERVED_116__GFX10 = 0x00000074, - IMG_FMT_RESERVED_117__GFX10 = 0x00000075, - IMG_FMT_RESERVED_118__GFX10 = 0x00000076, - IMG_FMT_RESERVED_119__GFX10 = 0x00000077, - IMG_FMT_RESERVED_120__GFX10 = 0x00000078, - IMG_FMT_RESERVED_121__GFX10 = 0x00000079, - IMG_FMT_RESERVED_122__GFX10 = 0x0000007a, - IMG_FMT_RESERVED_285__GFX101 = 0x0000011d, - IMG_FMT_RESERVED_286__GFX101 = 0x0000011e, - IMG_FMT_RESERVED_287__GFX101 = 0x0000011f, - IMG_FMT_RESERVED_288__GFX101 = 0x00000120, - IMG_FMT_RESERVED_289__GFX101 = 0x00000121, - IMG_FMT_RESERVED_290__GFX101 = 0x00000122, - IMG_FMT_RESERVED_291__GFX101 = 0x00000123, - IMG_FMT_RESERVED_292__GFX101 = 0x00000124, - IMG_FMT_RESERVED_293__GFX101 = 0x00000125, - IMG_FMT_7E3_FLOAT__GFX103 = 0x0000011d, - IMG_FMT_YCBCR_UNORM__GFX103 = 0x0000011e, - IMG_FMT_YCBCR_SRGB__GFX103 = 0x0000011f, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - IMG_FMT_10_11_11_FLOAT__GFX104PLUS = 0x0000001e, - IMG_FMT_11_11_10_FLOAT__GFX104PLUS = 0x0000001f, - IMG_FMT_10_10_10_2_UNORM__GFX104PLUS = 0x00000020, - IMG_FMT_10_10_10_2_SNORM__GFX104PLUS = 0x00000021, - IMG_FMT_10_10_10_2_UINT__GFX104PLUS = 0x00000022, - IMG_FMT_10_10_10_2_SINT__GFX104PLUS = 0x00000023, - IMG_FMT_2_10_10_10_UNORM__GFX104PLUS = 0x00000024, - IMG_FMT_2_10_10_10_SNORM__GFX104PLUS = 0x00000025, - IMG_FMT_2_10_10_10_USCALED__GFX104PLUS = 0x00000026, - IMG_FMT_2_10_10_10_SSCALED__GFX104PLUS = 0x00000027, - IMG_FMT_2_10_10_10_UINT__GFX104PLUS = 0x00000028, - IMG_FMT_2_10_10_10_SINT__GFX104PLUS = 0x00000029, - IMG_FMT_8_8_8_8_UNORM__GFX104PLUS = 0x0000002a, - IMG_FMT_8_8_8_8_SNORM__GFX104PLUS = 0x0000002b, - IMG_FMT_8_8_8_8_USCALED__GFX104PLUS = 0x0000002c, - IMG_FMT_8_8_8_8_SSCALED__GFX104PLUS = 0x0000002d, - IMG_FMT_8_8_8_8_UINT__GFX104PLUS = 0x0000002e, - IMG_FMT_8_8_8_8_SINT__GFX104PLUS = 0x0000002f, - IMG_FMT_32_32_UINT__GFX104PLUS = 0x00000030, - IMG_FMT_32_32_SINT__GFX104PLUS = 0x00000031, - IMG_FMT_32_32_FLOAT__GFX104PLUS = 0x00000032, - IMG_FMT_16_16_16_16_UNORM__GFX104PLUS = 0x00000033, - IMG_FMT_16_16_16_16_SNORM__GFX104PLUS = 0x00000034, - IMG_FMT_16_16_16_16_USCALED__GFX104PLUS = 0x00000035, - IMG_FMT_16_16_16_16_SSCALED__GFX104PLUS = 0x00000036, - IMG_FMT_16_16_16_16_UINT__GFX104PLUS = 0x00000037, - IMG_FMT_16_16_16_16_SINT__GFX104PLUS = 0x00000038, - IMG_FMT_16_16_16_16_FLOAT__GFX104PLUS = 0x00000039, - IMG_FMT_32_32_32_UINT__GFX104PLUS = 0x0000003a, - IMG_FMT_32_32_32_SINT__GFX104PLUS = 0x0000003b, - IMG_FMT_32_32_32_FLOAT__GFX104PLUS = 0x0000003c, - IMG_FMT_32_32_32_32_UINT__GFX104PLUS = 0x0000003d, - IMG_FMT_32_32_32_32_SINT__GFX104PLUS = 0x0000003e, - IMG_FMT_32_32_32_32_FLOAT__GFX104PLUS = 0x0000003f, - IMG_FMT_8_SRGB__GFX104PLUS = 0x00000040, - IMG_FMT_8_8_SRGB__GFX104PLUS = 0x00000041, - IMG_FMT_8_8_8_8_SRGB__GFX104PLUS = 0x00000042, - IMG_FMT_5_9_9_9_FLOAT__GFX104PLUS = 0x00000043, - IMG_FMT_5_6_5_UNORM__GFX104PLUS = 0x00000044, - IMG_FMT_1_5_5_5_UNORM__GFX104PLUS = 0x00000045, - IMG_FMT_5_5_5_1_UNORM__GFX104PLUS = 0x00000046, - IMG_FMT_4_4_4_4_UNORM__GFX104PLUS = 0x00000047, - IMG_FMT_4_4_UNORM__GFX104PLUS = 0x00000048, - IMG_FMT_1_UNORM__GFX104PLUS = 0x00000049, - IMG_FMT_1_REVERSED_UNORM__GFX104PLUS = 0x0000004a, - IMG_FMT_32_FLOAT_CLAMP__GFX104PLUS = 0x0000004b, - IMG_FMT_8_24_UNORM__GFX104PLUS = 0x0000004c, - IMG_FMT_8_24_UINT__GFX104PLUS = 0x0000004d, - IMG_FMT_24_8_UNORM__GFX104PLUS = 0x0000004e, - IMG_FMT_24_8_UINT__GFX104PLUS = 0x0000004f, - IMG_FMT_X24_8_32_UINT__GFX104PLUS = 0x00000050, - IMG_FMT_X24_8_32_FLOAT__GFX104PLUS = 0x00000051, - IMG_FMT_GB_GR_UNORM__GFX104PLUS = 0x00000052, - IMG_FMT_GB_GR_SNORM__GFX104PLUS = 0x00000053, - IMG_FMT_GB_GR_UINT__GFX104PLUS = 0x00000054, - IMG_FMT_GB_GR_SRGB__GFX104PLUS = 0x00000055, - IMG_FMT_BG_RG_UNORM__GFX104PLUS = 0x00000056, - IMG_FMT_BG_RG_SNORM__GFX104PLUS = 0x00000057, - IMG_FMT_BG_RG_UINT__GFX104PLUS = 0x00000058, - IMG_FMT_BG_RG_SRGB__GFX104PLUS = 0x00000059, - IMG_FMT_BC1_UNORM__GFX104PLUS = 0x0000006d, - IMG_FMT_BC1_SRGB__GFX104PLUS = 0x0000006e, - IMG_FMT_BC2_UNORM__GFX104PLUS = 0x0000006f, - IMG_FMT_BC2_SRGB__GFX104PLUS = 0x00000070, - IMG_FMT_BC3_UNORM__GFX104PLUS = 0x00000071, - IMG_FMT_BC3_SRGB__GFX104PLUS = 0x00000072, - IMG_FMT_RESERVED_147__GFX104PLUS = 0x00000093, - IMG_FMT_RESERVED_148__GFX104PLUS = 0x00000094, - IMG_FMT_RESERVED_149__GFX104PLUS = 0x00000095, - IMG_FMT_RESERVED_150__GFX104PLUS = 0x00000096, - IMG_FMT_RESERVED_151__GFX104PLUS = 0x00000097, - IMG_FMT_RESERVED_152__GFX104PLUS = 0x00000098, - IMG_FMT_RESERVED_153__GFX104PLUS = 0x00000099, - IMG_FMT_RESERVED_154__GFX104PLUS = 0x0000009a, - IMG_FMT_RESERVED_156__GFX104PLUS = 0x0000009c, - IMG_FMT_RESERVED_157__GFX104PLUS = 0x0000009d, - IMG_FMT_RESERVED_158__GFX104PLUS = 0x0000009e, - IMG_FMT_RESERVED_159__GFX104PLUS = 0x0000009f, - IMG_FMT_RESERVED_160__GFX104PLUS = 0x000000a0, - IMG_FMT_RESERVED_175__GFX104PLUS = 0x000000af, - IMG_FMT_RESERVED_176__GFX104PLUS = 0x000000b0, - IMG_FMT_RESERVED_177__GFX104PLUS = 0x000000b1, - IMG_FMT_RESERVED_178__GFX104PLUS = 0x000000b2, - IMG_FMT_RESERVED_179__GFX104PLUS = 0x000000b3, - IMG_FMT_RESERVED_180__GFX104PLUS = 0x000000b4, - IMG_FMT_RESERVED_181__GFX104PLUS = 0x000000b5, - IMG_FMT_RESERVED_182__GFX104PLUS = 0x000000b6, - IMG_FMT_RESERVED_183__GFX104PLUS = 0x000000b7, - IMG_FMT_RESERVED_184__GFX104PLUS = 0x000000b8, - IMG_FMT_RESERVED_185__GFX104PLUS = 0x000000b9, - IMG_FMT_RESERVED_186__GFX104PLUS = 0x000000ba, - IMG_FMT_RESERVED_187__GFX104PLUS = 0x000000bb, - IMG_FMT_RESERVED_188__GFX104PLUS = 0x000000bc, - IMG_FMT_RESERVED_189__GFX104PLUS = 0x000000bd, - IMG_FMT_RESERVED_190__GFX104PLUS = 0x000000be, - IMG_FMT_RESERVED_191__GFX104PLUS = 0x000000bf, - IMG_FMT_RESERVED_192__GFX104PLUS = 0x000000c0, - IMG_FMT_RESERVED_193__GFX104PLUS = 0x000000c1, - IMG_FMT_RESERVED_194__GFX104PLUS = 0x000000c2, - IMG_FMT_RESERVED_195__GFX104PLUS = 0x000000c3, - IMG_FMT_RESERVED_196__GFX104PLUS = 0x000000c4, - IMG_FMT_RESERVED_197__GFX104PLUS = 0x000000c5, - IMG_FMT_RESERVED_198__GFX104PLUS = 0x000000c6, - IMG_FMT_RESERVED_199__GFX104PLUS = 0x000000c7, - IMG_FMT_RESERVED_200__GFX104PLUS = 0x000000c8, - IMG_FMT_RESERVED_201__GFX104PLUS = 0x000000c9, - IMG_FMT_RESERVED_202__GFX104PLUS = 0x000000ca, - IMG_FMT_RESERVED_203__GFX104PLUS = 0x000000cb, - IMG_FMT_RESERVED_204__GFX104PLUS = 0x000000cc, - IMG_FMT_YCBCR_UNORM__GFX104PLUS = 0x000000cd, - IMG_FMT_YCBCR_SRGB__GFX104PLUS = 0x000000ce, - IMG_FMT_MM_8_UNORM__GFX104PLUS = 0x000000cf, - IMG_FMT_MM_8_UINT__GFX104PLUS = 0x000000d0, - IMG_FMT_MM_8_8_UNORM__GFX104PLUS = 0x000000d1, - IMG_FMT_MM_8_8_UINT__GFX104PLUS = 0x000000d2, - IMG_FMT_RESERVED_229__GFX104PLUS = 0x000000e5, - IMG_FMT_RESERVED_230__GFX104PLUS = 0x000000e6, - IMG_FMT_RESERVED_231__GFX104PLUS = 0x000000e7, - IMG_FMT_RESERVED_232__GFX104PLUS = 0x000000e8, - IMG_FMT_RESERVED_233__GFX104PLUS = 0x000000e9, - IMG_FMT_RESERVED_234__GFX104PLUS = 0x000000ea, - IMG_FMT_RESERVED_235__GFX104PLUS = 0x000000eb, - IMG_FMT_RESERVED_236__GFX104PLUS = 0x000000ec, - IMG_FMT_RESERVED_237__GFX104PLUS = 0x000000ed, - IMG_FMT_RESERVED_238__GFX104PLUS = 0x000000ee, - IMG_FMT_RESERVED_239__GFX104PLUS = 0x000000ef, - IMG_FMT_RESERVED_240__GFX104PLUS = 0x000000f0, - IMG_FMT_RESERVED_241__GFX104PLUS = 0x000000f1, - IMG_FMT_RESERVED_242__GFX104PLUS = 0x000000f2, - IMG_FMT_RESERVED_243__GFX104PLUS = 0x000000f3, - IMG_FMT_RESERVED_244__GFX104PLUS = 0x000000f4, - IMG_FMT_RESERVED_245__GFX104PLUS = 0x000000f5, - IMG_FMT_RESERVED_246__GFX104PLUS = 0x000000f6, - IMG_FMT_RESERVED_247__GFX104PLUS = 0x000000f7, - IMG_FMT_RESERVED_248__GFX104PLUS = 0x000000f8, - IMG_FMT_RESERVED_249__GFX104PLUS = 0x000000f9, - IMG_FMT_RESERVED_250__GFX104PLUS = 0x000000fa, - IMG_FMT_RESERVED_251__GFX104PLUS = 0x000000fb, - IMG_FMT_RESERVED_252__GFX104PLUS = 0x000000fc, - IMG_FMT_RESERVED_253__GFX104PLUS = 0x000000fd, - IMG_FMT_RESERVED_254__GFX104PLUS = 0x000000fe, - IMG_FMT_RESERVED_255__GFX104PLUS = 0x000000ff, -#endif - IMG_FMT_10_11_11_UNORM__GFX10CORE = 0x0000001e, - IMG_FMT_10_11_11_SNORM__GFX10CORE = 0x0000001f, - IMG_FMT_10_11_11_USCALED__GFX10CORE = 0x00000020, - IMG_FMT_10_11_11_SSCALED__GFX10CORE = 0x00000021, - IMG_FMT_10_11_11_UINT__GFX10CORE = 0x00000022, - IMG_FMT_10_11_11_SINT__GFX10CORE = 0x00000023, - IMG_FMT_10_11_11_FLOAT__GFX10CORE = 0x00000024, - IMG_FMT_11_11_10_UNORM__GFX10CORE = 0x00000025, - IMG_FMT_11_11_10_SNORM__GFX10CORE = 0x00000026, - IMG_FMT_11_11_10_USCALED__GFX10CORE = 0x00000027, - IMG_FMT_11_11_10_SSCALED__GFX10CORE = 0x00000028, - IMG_FMT_11_11_10_UINT__GFX10CORE = 0x00000029, - IMG_FMT_11_11_10_SINT__GFX10CORE = 0x0000002a, - IMG_FMT_11_11_10_FLOAT__GFX10CORE = 0x0000002b, - IMG_FMT_10_10_10_2_UNORM__GFX10CORE = 0x0000002c, - IMG_FMT_10_10_10_2_SNORM__GFX10CORE = 0x0000002d, - IMG_FMT_10_10_10_2_USCALED__GFX10CORE = 0x0000002e, - IMG_FMT_10_10_10_2_SSCALED__GFX10CORE = 0x0000002f, - IMG_FMT_10_10_10_2_UINT__GFX10CORE = 0x00000030, - IMG_FMT_10_10_10_2_SINT__GFX10CORE = 0x00000031, - IMG_FMT_2_10_10_10_UNORM__GFX10CORE = 0x00000032, - IMG_FMT_2_10_10_10_SNORM__GFX10CORE = 0x00000033, - IMG_FMT_2_10_10_10_USCALED__GFX10CORE = 0x00000034, - IMG_FMT_2_10_10_10_SSCALED__GFX10CORE = 0x00000035, - IMG_FMT_2_10_10_10_UINT__GFX10CORE = 0x00000036, - IMG_FMT_2_10_10_10_SINT__GFX10CORE = 0x00000037, - IMG_FMT_8_8_8_8_UNORM__GFX10CORE = 0x00000038, - IMG_FMT_8_8_8_8_SNORM__GFX10CORE = 0x00000039, - IMG_FMT_8_8_8_8_USCALED__GFX10CORE = 0x0000003a, - IMG_FMT_8_8_8_8_SSCALED__GFX10CORE = 0x0000003b, - IMG_FMT_8_8_8_8_UINT__GFX10CORE = 0x0000003c, - IMG_FMT_8_8_8_8_SINT__GFX10CORE = 0x0000003d, - IMG_FMT_32_32_UINT__GFX10CORE = 0x0000003e, - IMG_FMT_32_32_SINT__GFX10CORE = 0x0000003f, - IMG_FMT_32_32_FLOAT__GFX10CORE = 0x00000040, - IMG_FMT_16_16_16_16_UNORM__GFX10CORE = 0x00000041, - IMG_FMT_16_16_16_16_SNORM__GFX10CORE = 0x00000042, - IMG_FMT_16_16_16_16_USCALED__GFX10CORE = 0x00000043, - IMG_FMT_16_16_16_16_SSCALED__GFX10CORE = 0x00000044, - IMG_FMT_16_16_16_16_UINT__GFX10CORE = 0x00000045, - IMG_FMT_16_16_16_16_SINT__GFX10CORE = 0x00000046, - IMG_FMT_16_16_16_16_FLOAT__GFX10CORE = 0x00000047, - IMG_FMT_32_32_32_UINT__GFX10CORE = 0x00000048, - IMG_FMT_32_32_32_SINT__GFX10CORE = 0x00000049, - IMG_FMT_32_32_32_FLOAT__GFX10CORE = 0x0000004a, - IMG_FMT_32_32_32_32_UINT__GFX10CORE = 0x0000004b, - IMG_FMT_32_32_32_32_SINT__GFX10CORE = 0x0000004c, - IMG_FMT_32_32_32_32_FLOAT__GFX10CORE = 0x0000004d, - IMG_FMT_RESERVED_78__GFX10CORE = 0x0000004e, - IMG_FMT_RESERVED_79__GFX10CORE = 0x0000004f, - IMG_FMT_RESERVED_80__GFX10CORE = 0x00000050, - IMG_FMT_RESERVED_81__GFX10CORE = 0x00000051, - IMG_FMT_RESERVED_82__GFX10CORE = 0x00000052, - IMG_FMT_RESERVED_83__GFX10CORE = 0x00000053, - IMG_FMT_RESERVED_84__GFX10CORE = 0x00000054, - IMG_FMT_RESERVED_85__GFX10CORE = 0x00000055, - IMG_FMT_RESERVED_86__GFX10CORE = 0x00000056, - IMG_FMT_RESERVED_87__GFX10CORE = 0x00000057, - IMG_FMT_RESERVED_88__GFX10CORE = 0x00000058, - IMG_FMT_RESERVED_89__GFX10CORE = 0x00000059, - IMG_FMT_RESERVED_90__GFX10CORE = 0x0000005a, - IMG_FMT_RESERVED_91__GFX10CORE = 0x0000005b, - IMG_FMT_RESERVED_92__GFX10CORE = 0x0000005c, - IMG_FMT_RESERVED_93__GFX10CORE = 0x0000005d, - IMG_FMT_RESERVED_94__GFX10CORE = 0x0000005e, - IMG_FMT_RESERVED_95__GFX10CORE = 0x0000005f, - IMG_FMT_RESERVED_109__GFX10CORE = 0x0000006d, - IMG_FMT_RESERVED_110__GFX10CORE = 0x0000006e, - IMG_FMT_RESERVED_111__GFX10CORE = 0x0000006f, - IMG_FMT_RESERVED_112__GFX10CORE = 0x00000070, - IMG_FMT_RESERVED_113__GFX10CORE = 0x00000071, - IMG_FMT_RESERVED_114__GFX10CORE = 0x00000072, - IMG_FMT_8_SRGB__GFX10CORE = 0x00000080, - IMG_FMT_8_8_SRGB__GFX10CORE = 0x00000081, - IMG_FMT_8_8_8_8_SRGB__GFX10CORE = 0x00000082, - IMG_FMT_6E4_FLOAT__GFX10CORE = 0x00000083, - IMG_FMT_5_9_9_9_FLOAT__GFX10CORE = 0x00000084, - IMG_FMT_5_6_5_UNORM__GFX10CORE = 0x00000085, - IMG_FMT_1_5_5_5_UNORM__GFX10CORE = 0x00000086, - IMG_FMT_5_5_5_1_UNORM__GFX10CORE = 0x00000087, - IMG_FMT_4_4_4_4_UNORM__GFX10CORE = 0x00000088, - IMG_FMT_4_4_UNORM__GFX10CORE = 0x00000089, - IMG_FMT_1_UNORM__GFX10CORE = 0x0000008a, - IMG_FMT_1_REVERSED_UNORM__GFX10CORE = 0x0000008b, - IMG_FMT_32_FLOAT_CLAMP__GFX10CORE = 0x0000008c, - IMG_FMT_8_24_UNORM__GFX10CORE = 0x0000008d, - IMG_FMT_8_24_UINT__GFX10CORE = 0x0000008e, - IMG_FMT_24_8_UNORM__GFX10CORE = 0x0000008f, - IMG_FMT_24_8_UINT__GFX10CORE = 0x00000090, - IMG_FMT_X24_8_32_UINT__GFX10CORE = 0x00000091, - IMG_FMT_X24_8_32_FLOAT__GFX10CORE = 0x00000092, - IMG_FMT_GB_GR_UNORM__GFX10CORE = 0x00000093, - IMG_FMT_GB_GR_SNORM__GFX10CORE = 0x00000094, - IMG_FMT_GB_GR_UINT__GFX10CORE = 0x00000095, - IMG_FMT_GB_GR_SRGB__GFX10CORE = 0x00000096, - IMG_FMT_BG_RG_UNORM__GFX10CORE = 0x00000097, - IMG_FMT_BG_RG_SNORM__GFX10CORE = 0x00000098, - IMG_FMT_BG_RG_UINT__GFX10CORE = 0x00000099, - IMG_FMT_BG_RG_SRGB__GFX10CORE = 0x0000009a, - IMG_FMT_FMASK8_S2_F1__GFX10CORE = 0x0000009c, - IMG_FMT_FMASK8_S4_F1__GFX10CORE = 0x0000009d, - IMG_FMT_FMASK8_S8_F1__GFX10CORE = 0x0000009e, - IMG_FMT_FMASK8_S2_F2__GFX10CORE = 0x0000009f, - IMG_FMT_FMASK8_S4_F2__GFX10CORE = 0x000000a0, - IMG_FMT_FMASK8_S4_F4__GFX10CORE = 0x000000a1, - IMG_FMT_FMASK16_S16_F1__GFX10CORE = 0x000000a2, - IMG_FMT_FMASK16_S8_F2__GFX10CORE = 0x000000a3, - IMG_FMT_FMASK32_S16_F2__GFX10CORE = 0x000000a4, - IMG_FMT_FMASK32_S8_F4__GFX10CORE = 0x000000a5, - IMG_FMT_FMASK32_S8_F8__GFX10CORE = 0x000000a6, - IMG_FMT_FMASK64_S16_F4__GFX10CORE = 0x000000a7, - IMG_FMT_FMASK64_S16_F8__GFX10CORE = 0x000000a8, - IMG_FMT_BC1_UNORM__GFX10CORE = 0x000000a9, - IMG_FMT_BC1_SRGB__GFX10CORE = 0x000000aa, - IMG_FMT_BC2_UNORM__GFX10CORE = 0x000000ab, - IMG_FMT_BC2_SRGB__GFX10CORE = 0x000000ac, - IMG_FMT_BC3_UNORM__GFX10CORE = 0x000000ad, - IMG_FMT_BC3_SRGB__GFX10CORE = 0x000000ae, - IMG_FMT_BC4_UNORM__GFX10CORE = 0x000000af, - IMG_FMT_BC4_SNORM__GFX10CORE = 0x000000b0, - IMG_FMT_BC5_UNORM__GFX10CORE = 0x000000b1, - IMG_FMT_BC5_SNORM__GFX10CORE = 0x000000b2, - IMG_FMT_BC6_UFLOAT__GFX10CORE = 0x000000b3, - IMG_FMT_BC6_SFLOAT__GFX10CORE = 0x000000b4, - IMG_FMT_BC7_UNORM__GFX10CORE = 0x000000b5, - IMG_FMT_BC7_SRGB__GFX10CORE = 0x000000b6, - IMG_FMT_ETC2_RGB_UNORM__GFX10CORE = 0x000000b7, - IMG_FMT_ETC2_RGB_SRGB__GFX10CORE = 0x000000b8, - IMG_FMT_ETC2_RGBA_UNORM__GFX10CORE = 0x000000b9, - IMG_FMT_ETC2_RGBA_SRGB__GFX10CORE = 0x000000ba, - IMG_FMT_ETC2_R_UNORM__GFX10CORE = 0x000000bb, - IMG_FMT_ETC2_R_SNORM__GFX10CORE = 0x000000bc, - IMG_FMT_ETC2_RG_UNORM__GFX10CORE = 0x000000bd, - IMG_FMT_ETC2_RG_SNORM__GFX10CORE = 0x000000be, - IMG_FMT_ETC2_RGBA1_UNORM__GFX10CORE = 0x000000bf, - IMG_FMT_ETC2_RGBA1_SRGB__GFX10CORE = 0x000000c0, - IMG_FMT_ASTC_2D_LDR_4X4__GFX10CORE = 0x000000c1, - IMG_FMT_ASTC_2D_LDR_5X4__GFX10CORE = 0x000000c2, - IMG_FMT_ASTC_2D_LDR_5X5__GFX10CORE = 0x000000c3, - IMG_FMT_ASTC_2D_LDR_6X5__GFX10CORE = 0x000000c4, - IMG_FMT_ASTC_2D_LDR_6X6__GFX10CORE = 0x000000c5, - IMG_FMT_ASTC_2D_LDR_8X5__GFX10CORE = 0x000000c6, - IMG_FMT_ASTC_2D_LDR_8X6__GFX10CORE = 0x000000c7, - IMG_FMT_ASTC_2D_LDR_8X8__GFX10CORE = 0x000000c8, - IMG_FMT_ASTC_2D_LDR_10X5__GFX10CORE = 0x000000c9, - IMG_FMT_ASTC_2D_LDR_10X6__GFX10CORE = 0x000000ca, - IMG_FMT_ASTC_2D_LDR_10X8__GFX10CORE = 0x000000cb, - IMG_FMT_ASTC_2D_LDR_10X10__GFX10CORE = 0x000000cc, - IMG_FMT_ASTC_2D_LDR_12X10__GFX10CORE = 0x000000cd, - IMG_FMT_ASTC_2D_LDR_12X12__GFX10CORE = 0x000000ce, - IMG_FMT_ASTC_2D_HDR_4X4__GFX10CORE = 0x000000cf, - IMG_FMT_ASTC_2D_HDR_5X4__GFX10CORE = 0x000000d0, - IMG_FMT_ASTC_2D_HDR_5X5__GFX10CORE = 0x000000d1, - IMG_FMT_ASTC_2D_HDR_6X5__GFX10CORE = 0x000000d2, - IMG_FMT_ASTC_2D_HDR_6X6__GFX10CORE = 0x000000d3, - IMG_FMT_ASTC_2D_HDR_8X5__GFX10CORE = 0x000000d4, - IMG_FMT_ASTC_2D_HDR_8X6__GFX10CORE = 0x000000d5, - IMG_FMT_ASTC_2D_HDR_8X8__GFX10CORE = 0x000000d6, - IMG_FMT_ASTC_2D_HDR_10X5__GFX10CORE = 0x000000d7, - IMG_FMT_ASTC_2D_HDR_10X6__GFX10CORE = 0x000000d8, - IMG_FMT_ASTC_2D_HDR_10X8__GFX10CORE = 0x000000d9, - IMG_FMT_ASTC_2D_HDR_10X10__GFX10CORE = 0x000000da, - IMG_FMT_ASTC_2D_HDR_12X10__GFX10CORE = 0x000000db, - IMG_FMT_ASTC_2D_HDR_12X12__GFX10CORE = 0x000000dc, - IMG_FMT_ASTC_2D_LDR_SRGB_4X4__GFX10CORE = 0x000000dd, - IMG_FMT_ASTC_2D_LDR_SRGB_5X4__GFX10CORE = 0x000000de, - IMG_FMT_ASTC_2D_LDR_SRGB_5X5__GFX10CORE = 0x000000df, - IMG_FMT_ASTC_2D_LDR_SRGB_6X5__GFX10CORE = 0x000000e0, - IMG_FMT_ASTC_2D_LDR_SRGB_6X6__GFX10CORE = 0x000000e1, - IMG_FMT_ASTC_2D_LDR_SRGB_8X5__GFX10CORE = 0x000000e2, - IMG_FMT_ASTC_2D_LDR_SRGB_8X6__GFX10CORE = 0x000000e3, - IMG_FMT_ASTC_2D_LDR_SRGB_8X8__GFX10CORE = 0x000000e4, - IMG_FMT_ASTC_2D_LDR_SRGB_10X5__GFX10CORE = 0x000000e5, - IMG_FMT_ASTC_2D_LDR_SRGB_10X6__GFX10CORE = 0x000000e6, - IMG_FMT_ASTC_2D_LDR_SRGB_10X8__GFX10CORE = 0x000000e7, - IMG_FMT_ASTC_2D_LDR_SRGB_10X10__GFX10CORE = 0x000000e8, - IMG_FMT_ASTC_2D_LDR_SRGB_12X10__GFX10CORE = 0x000000e9, - IMG_FMT_ASTC_2D_LDR_SRGB_12X12__GFX10CORE = 0x000000ea, - IMG_FMT_ASTC_3D_LDR_3X3X3__GFX10CORE = 0x000000eb, - IMG_FMT_ASTC_3D_LDR_4X3X3__GFX10CORE = 0x000000ec, - IMG_FMT_ASTC_3D_LDR_4X4X3__GFX10CORE = 0x000000ed, - IMG_FMT_ASTC_3D_LDR_4X4X4__GFX10CORE = 0x000000ee, - IMG_FMT_ASTC_3D_LDR_5X4X4__GFX10CORE = 0x000000ef, - IMG_FMT_ASTC_3D_LDR_5X5X4__GFX10CORE = 0x000000f0, - IMG_FMT_ASTC_3D_LDR_5X5X5__GFX10CORE = 0x000000f1, - IMG_FMT_ASTC_3D_LDR_6X5X5__GFX10CORE = 0x000000f2, - IMG_FMT_ASTC_3D_LDR_6X6X5__GFX10CORE = 0x000000f3, - IMG_FMT_ASTC_3D_LDR_6X6X6__GFX10CORE = 0x000000f4, - IMG_FMT_ASTC_3D_HDR_3X3X3__GFX10CORE = 0x000000f5, - IMG_FMT_ASTC_3D_HDR_4X3X3__GFX10CORE = 0x000000f6, - IMG_FMT_ASTC_3D_HDR_4X4X3__GFX10CORE = 0x000000f7, - IMG_FMT_ASTC_3D_HDR_4X4X4__GFX10CORE = 0x000000f8, - IMG_FMT_ASTC_3D_HDR_5X4X4__GFX10CORE = 0x000000f9, - IMG_FMT_ASTC_3D_HDR_5X5X4__GFX10CORE = 0x000000fa, - IMG_FMT_ASTC_3D_HDR_5X5X5__GFX10CORE = 0x000000fb, - IMG_FMT_ASTC_3D_HDR_6X5X5__GFX10CORE = 0x000000fc, - IMG_FMT_ASTC_3D_HDR_6X6X5__GFX10CORE = 0x000000fd, - IMG_FMT_ASTC_3D_HDR_6X6X6__GFX10CORE = 0x000000fe, - IMG_FMT_ASTC_3D_LDR_SRGB_3X3X3__GFX10CORE = 0x000000ff, - IMG_FMT_ASTC_3D_LDR_SRGB_4X3X3__GFX10CORE = 0x00000100, - IMG_FMT_ASTC_3D_LDR_SRGB_4X4X3__GFX10CORE = 0x00000101, - IMG_FMT_ASTC_3D_LDR_SRGB_4X4X4__GFX10CORE = 0x00000102, - IMG_FMT_ASTC_3D_LDR_SRGB_5X4X4__GFX10CORE = 0x00000103, - IMG_FMT_ASTC_3D_LDR_SRGB_5X5X4__GFX10CORE = 0x00000104, - IMG_FMT_ASTC_3D_LDR_SRGB_5X5X5__GFX10CORE = 0x00000105, - IMG_FMT_ASTC_3D_LDR_SRGB_6X5X5__GFX10CORE = 0x00000106, - IMG_FMT_ASTC_3D_LDR_SRGB_6X6X5__GFX10CORE = 0x00000107, - IMG_FMT_ASTC_3D_LDR_SRGB_6X6X6__GFX10CORE = 0x00000108, - IMG_FMT_MM_8_UNORM__GFX10CORE = 0x00000109, - IMG_FMT_MM_8_UINT__GFX10CORE = 0x0000010a, - IMG_FMT_MM_8_8_UNORM__GFX10CORE = 0x0000010b, - IMG_FMT_MM_8_8_UINT__GFX10CORE = 0x0000010c, - IMG_FMT_MM_8_8_8_8_UNORM__GFX10CORE = 0x0000010d, - IMG_FMT_MM_8_8_8_8_UINT__GFX10CORE = 0x0000010e, - IMG_FMT_MM_VYUY8_UNORM__GFX10CORE = 0x0000010f, - IMG_FMT_MM_VYUY8_UINT__GFX10CORE = 0x00000110, - IMG_FMT_MM_10_11_11_UNORM__GFX10CORE = 0x00000111, - IMG_FMT_MM_10_11_11_UINT__GFX10CORE = 0x00000112, - IMG_FMT_MM_2_10_10_10_UNORM__GFX10CORE = 0x00000113, - IMG_FMT_MM_2_10_10_10_UINT__GFX10CORE = 0x00000114, - IMG_FMT_MM_16_16_16_16_UNORM__GFX10CORE = 0x00000115, - IMG_FMT_MM_16_16_16_16_UINT__GFX10CORE = 0x00000116, - IMG_FMT_MM_10_IN_16_UNORM__GFX10CORE = 0x00000117, - IMG_FMT_MM_10_IN_16_UINT__GFX10CORE = 0x00000118, - IMG_FMT_MM_10_IN_16_16_UNORM__GFX10CORE = 0x00000119, - IMG_FMT_MM_10_IN_16_16_UINT__GFX10CORE = 0x0000011a, - IMG_FMT_MM_10_IN_16_16_16_16_UNORM__GFX10CORE = 0x0000011b, - IMG_FMT_MM_10_IN_16_16_16_16_UINT__GFX10CORE = 0x0000011c, - IMG_FMT_RESERVED_294__GFX10CORE = 0x00000126, - IMG_FMT_RESERVED_295__GFX10CORE = 0x00000127, - IMG_FMT_RESERVED_296__GFX10CORE = 0x00000128, - IMG_FMT_RESERVED_297__GFX10CORE = 0x00000129, - IMG_FMT_RESERVED_298__GFX10CORE = 0x0000012a, - IMG_FMT_RESERVED_299__GFX10CORE = 0x0000012b, - IMG_FMT_RESERVED_300__GFX10CORE = 0x0000012c, - IMG_FMT_RESERVED_301__GFX10CORE = 0x0000012d, - IMG_FMT_RESERVED_302__GFX10CORE = 0x0000012e, - IMG_FMT_RESERVED_303__GFX10CORE = 0x0000012f, - IMG_FMT_RESERVED_304__GFX10CORE = 0x00000130, - IMG_FMT_RESERVED_305__GFX10CORE = 0x00000131, - IMG_FMT_RESERVED_306__GFX10CORE = 0x00000132, - IMG_FMT_RESERVED_307__GFX10CORE = 0x00000133, - IMG_FMT_RESERVED_308__GFX10CORE = 0x00000134, - IMG_FMT_RESERVED_309__GFX10CORE = 0x00000135, - IMG_FMT_RESERVED_310__GFX10CORE = 0x00000136, - IMG_FMT_RESERVED_311__GFX10CORE = 0x00000137, - IMG_FMT_RESERVED_312__GFX10CORE = 0x00000138, - IMG_FMT_RESERVED_313__GFX10CORE = 0x00000139, - IMG_FMT_RESERVED_314__GFX10CORE = 0x0000013a, - IMG_FMT_RESERVED_315__GFX10CORE = 0x0000013b, - IMG_FMT_RESERVED_316__GFX10CORE = 0x0000013c, - IMG_FMT_RESERVED_317__GFX10CORE = 0x0000013d, - IMG_FMT_RESERVED_318__GFX10CORE = 0x0000013e, - IMG_FMT_RESERVED_319__GFX10CORE = 0x0000013f, - IMG_FMT_RESERVED_320__GFX10CORE = 0x00000140, - IMG_FMT_RESERVED_321__GFX10CORE = 0x00000141, - IMG_FMT_RESERVED_322__GFX10CORE = 0x00000142, - IMG_FMT_RESERVED_323__GFX10CORE = 0x00000143, - IMG_FMT_RESERVED_324__GFX10CORE = 0x00000144, - IMG_FMT_RESERVED_325__GFX10CORE = 0x00000145, - IMG_FMT_RESERVED_326__GFX10CORE = 0x00000146, - IMG_FMT_RESERVED_327__GFX10CORE = 0x00000147, - IMG_FMT_RESERVED_328__GFX10CORE = 0x00000148, - IMG_FMT_RESERVED_329__GFX10CORE = 0x00000149, - IMG_FMT_RESERVED_330__GFX10CORE = 0x0000014a, - IMG_FMT_RESERVED_331__GFX10CORE = 0x0000014b, - IMG_FMT_RESERVED_332__GFX10CORE = 0x0000014c, - IMG_FMT_RESERVED_333__GFX10CORE = 0x0000014d, - IMG_FMT_RESERVED_334__GFX10CORE = 0x0000014e, - IMG_FMT_RESERVED_335__GFX10CORE = 0x0000014f, - IMG_FMT_RESERVED_336__GFX10CORE = 0x00000150, - IMG_FMT_RESERVED_337__GFX10CORE = 0x00000151, - IMG_FMT_RESERVED_338__GFX10CORE = 0x00000152, - IMG_FMT_RESERVED_339__GFX10CORE = 0x00000153, - IMG_FMT_RESERVED_340__GFX10CORE = 0x00000154, - IMG_FMT_RESERVED_341__GFX10CORE = 0x00000155, - IMG_FMT_RESERVED_342__GFX10CORE = 0x00000156, - IMG_FMT_RESERVED_343__GFX10CORE = 0x00000157, - IMG_FMT_RESERVED_344__GFX10CORE = 0x00000158, - IMG_FMT_RESERVED_345__GFX10CORE = 0x00000159, - IMG_FMT_RESERVED_346__GFX10CORE = 0x0000015a, - IMG_FMT_RESERVED_347__GFX10CORE = 0x0000015b, - IMG_FMT_RESERVED_348__GFX10CORE = 0x0000015c, - IMG_FMT_RESERVED_349__GFX10CORE = 0x0000015d, - IMG_FMT_RESERVED_350__GFX10CORE = 0x0000015e, - IMG_FMT_RESERVED_351__GFX10CORE = 0x0000015f, - IMG_FMT_RESERVED_352__GFX10CORE = 0x00000160, - IMG_FMT_RESERVED_353__GFX10CORE = 0x00000161, - IMG_FMT_RESERVED_354__GFX10CORE = 0x00000162, - IMG_FMT_RESERVED_355__GFX10CORE = 0x00000163, - IMG_FMT_RESERVED_356__GFX10CORE = 0x00000164, - IMG_FMT_RESERVED_357__GFX10CORE = 0x00000165, - IMG_FMT_RESERVED_358__GFX10CORE = 0x00000166, - IMG_FMT_RESERVED_359__GFX10CORE = 0x00000167, - IMG_FMT_RESERVED_360__GFX10CORE = 0x00000168, - IMG_FMT_RESERVED_361__GFX10CORE = 0x00000169, - IMG_FMT_RESERVED_362__GFX10CORE = 0x0000016a, - IMG_FMT_RESERVED_363__GFX10CORE = 0x0000016b, - IMG_FMT_RESERVED_364__GFX10CORE = 0x0000016c, - IMG_FMT_RESERVED_365__GFX10CORE = 0x0000016d, - IMG_FMT_RESERVED_366__GFX10CORE = 0x0000016e, - IMG_FMT_RESERVED_367__GFX10CORE = 0x0000016f, - IMG_FMT_RESERVED_368__GFX10CORE = 0x00000170, - IMG_FMT_RESERVED_369__GFX10CORE = 0x00000171, - IMG_FMT_RESERVED_370__GFX10CORE = 0x00000172, - IMG_FMT_RESERVED_371__GFX10CORE = 0x00000173, - IMG_FMT_RESERVED_372__GFX10CORE = 0x00000174, - IMG_FMT_RESERVED_373__GFX10CORE = 0x00000175, - IMG_FMT_RESERVED_374__GFX10CORE = 0x00000176, - IMG_FMT_RESERVED_375__GFX10CORE = 0x00000177, - IMG_FMT_RESERVED_376__GFX10CORE = 0x00000178, - IMG_FMT_RESERVED_377__GFX10CORE = 0x00000179, - IMG_FMT_RESERVED_378__GFX10CORE = 0x0000017a, - IMG_FMT_RESERVED_379__GFX10CORE = 0x0000017b, - IMG_FMT_RESERVED_380__GFX10CORE = 0x0000017c, - IMG_FMT_RESERVED_381__GFX10CORE = 0x0000017d, - IMG_FMT_RESERVED_382__GFX10CORE = 0x0000017e, - IMG_FMT_RESERVED_383__GFX10CORE = 0x0000017f, - IMG_FMT_RESERVED_384__GFX10CORE = 0x00000180, - IMG_FMT_RESERVED_385__GFX10CORE = 0x00000181, - IMG_FMT_RESERVED_386__GFX10CORE = 0x00000182, - IMG_FMT_RESERVED_387__GFX10CORE = 0x00000183, - IMG_FMT_RESERVED_388__GFX10CORE = 0x00000184, - IMG_FMT_RESERVED_389__GFX10CORE = 0x00000185, - IMG_FMT_RESERVED_390__GFX10CORE = 0x00000186, - IMG_FMT_RESERVED_391__GFX10CORE = 0x00000187, - IMG_FMT_RESERVED_392__GFX10CORE = 0x00000188, - IMG_FMT_RESERVED_393__GFX10CORE = 0x00000189, - IMG_FMT_RESERVED_394__GFX10CORE = 0x0000018a, - IMG_FMT_RESERVED_395__GFX10CORE = 0x0000018b, - IMG_FMT_RESERVED_396__GFX10CORE = 0x0000018c, - IMG_FMT_RESERVED_397__GFX10CORE = 0x0000018d, - IMG_FMT_RESERVED_398__GFX10CORE = 0x0000018e, - IMG_FMT_RESERVED_399__GFX10CORE = 0x0000018f, - IMG_FMT_RESERVED_400__GFX10CORE = 0x00000190, - IMG_FMT_RESERVED_401__GFX10CORE = 0x00000191, - IMG_FMT_RESERVED_402__GFX10CORE = 0x00000192, - IMG_FMT_RESERVED_403__GFX10CORE = 0x00000193, - IMG_FMT_RESERVED_404__GFX10CORE = 0x00000194, - IMG_FMT_RESERVED_405__GFX10CORE = 0x00000195, - IMG_FMT_RESERVED_406__GFX10CORE = 0x00000196, - IMG_FMT_RESERVED_407__GFX10CORE = 0x00000197, - IMG_FMT_RESERVED_408__GFX10CORE = 0x00000198, - IMG_FMT_RESERVED_409__GFX10CORE = 0x00000199, - IMG_FMT_RESERVED_410__GFX10CORE = 0x0000019a, - IMG_FMT_RESERVED_411__GFX10CORE = 0x0000019b, - IMG_FMT_RESERVED_412__GFX10CORE = 0x0000019c, - IMG_FMT_RESERVED_413__GFX10CORE = 0x0000019d, - IMG_FMT_RESERVED_414__GFX10CORE = 0x0000019e, - IMG_FMT_RESERVED_415__GFX10CORE = 0x0000019f, - IMG_FMT_RESERVED_416__GFX10CORE = 0x000001a0, - IMG_FMT_RESERVED_417__GFX10CORE = 0x000001a1, - IMG_FMT_RESERVED_418__GFX10CORE = 0x000001a2, - IMG_FMT_RESERVED_419__GFX10CORE = 0x000001a3, - IMG_FMT_RESERVED_420__GFX10CORE = 0x000001a4, - IMG_FMT_RESERVED_421__GFX10CORE = 0x000001a5, - IMG_FMT_RESERVED_422__GFX10CORE = 0x000001a6, - IMG_FMT_RESERVED_423__GFX10CORE = 0x000001a7, - IMG_FMT_RESERVED_424__GFX10CORE = 0x000001a8, - IMG_FMT_RESERVED_425__GFX10CORE = 0x000001a9, - IMG_FMT_RESERVED_426__GFX10CORE = 0x000001aa, - IMG_FMT_RESERVED_427__GFX10CORE = 0x000001ab, - IMG_FMT_RESERVED_428__GFX10CORE = 0x000001ac, - IMG_FMT_RESERVED_429__GFX10CORE = 0x000001ad, - IMG_FMT_RESERVED_430__GFX10CORE = 0x000001ae, - IMG_FMT_RESERVED_431__GFX10CORE = 0x000001af, - IMG_FMT_RESERVED_432__GFX10CORE = 0x000001b0, - IMG_FMT_RESERVED_433__GFX10CORE = 0x000001b1, - IMG_FMT_RESERVED_434__GFX10CORE = 0x000001b2, - IMG_FMT_RESERVED_435__GFX10CORE = 0x000001b3, - IMG_FMT_RESERVED_436__GFX10CORE = 0x000001b4, - IMG_FMT_RESERVED_437__GFX10CORE = 0x000001b5, - IMG_FMT_RESERVED_438__GFX10CORE = 0x000001b6, - IMG_FMT_RESERVED_439__GFX10CORE = 0x000001b7, - IMG_FMT_RESERVED_440__GFX10CORE = 0x000001b8, - IMG_FMT_RESERVED_441__GFX10CORE = 0x000001b9, - IMG_FMT_RESERVED_442__GFX10CORE = 0x000001ba, - IMG_FMT_RESERVED_443__GFX10CORE = 0x000001bb, - IMG_FMT_RESERVED_444__GFX10CORE = 0x000001bc, - IMG_FMT_RESERVED_445__GFX10CORE = 0x000001bd, - IMG_FMT_RESERVED_446__GFX10CORE = 0x000001be, - IMG_FMT_RESERVED_447__GFX10CORE = 0x000001bf, - IMG_FMT_RESERVED_448__GFX10CORE = 0x000001c0, - IMG_FMT_RESERVED_449__GFX10CORE = 0x000001c1, - IMG_FMT_RESERVED_450__GFX10CORE = 0x000001c2, - IMG_FMT_RESERVED_451__GFX10CORE = 0x000001c3, - IMG_FMT_RESERVED_452__GFX10CORE = 0x000001c4, - IMG_FMT_RESERVED_453__GFX10CORE = 0x000001c5, - IMG_FMT_RESERVED_454__GFX10CORE = 0x000001c6, - IMG_FMT_RESERVED_455__GFX10CORE = 0x000001c7, - IMG_FMT_RESERVED_456__GFX10CORE = 0x000001c8, - IMG_FMT_RESERVED_457__GFX10CORE = 0x000001c9, - IMG_FMT_RESERVED_458__GFX10CORE = 0x000001ca, - IMG_FMT_RESERVED_459__GFX10CORE = 0x000001cb, - IMG_FMT_RESERVED_460__GFX10CORE = 0x000001cc, - IMG_FMT_RESERVED_461__GFX10CORE = 0x000001cd, - IMG_FMT_RESERVED_462__GFX10CORE = 0x000001ce, - IMG_FMT_RESERVED_463__GFX10CORE = 0x000001cf, - IMG_FMT_RESERVED_464__GFX10CORE = 0x000001d0, - IMG_FMT_RESERVED_465__GFX10CORE = 0x000001d1, - IMG_FMT_RESERVED_466__GFX10CORE = 0x000001d2, - IMG_FMT_RESERVED_467__GFX10CORE = 0x000001d3, - IMG_FMT_RESERVED_468__GFX10CORE = 0x000001d4, - IMG_FMT_RESERVED_469__GFX10CORE = 0x000001d5, - IMG_FMT_RESERVED_470__GFX10CORE = 0x000001d6, - IMG_FMT_RESERVED_471__GFX10CORE = 0x000001d7, - IMG_FMT_RESERVED_472__GFX10CORE = 0x000001d8, - IMG_FMT_RESERVED_473__GFX10CORE = 0x000001d9, - IMG_FMT_RESERVED_474__GFX10CORE = 0x000001da, - IMG_FMT_RESERVED_475__GFX10CORE = 0x000001db, - IMG_FMT_RESERVED_476__GFX10CORE = 0x000001dc, - IMG_FMT_RESERVED_477__GFX10CORE = 0x000001dd, - IMG_FMT_RESERVED_478__GFX10CORE = 0x000001de, - IMG_FMT_RESERVED_479__GFX10CORE = 0x000001df, - IMG_FMT_RESERVED_480__GFX10CORE = 0x000001e0, - IMG_FMT_RESERVED_481__GFX10CORE = 0x000001e1, - IMG_FMT_RESERVED_482__GFX10CORE = 0x000001e2, - IMG_FMT_RESERVED_483__GFX10CORE = 0x000001e3, - IMG_FMT_RESERVED_484__GFX10CORE = 0x000001e4, - IMG_FMT_RESERVED_485__GFX10CORE = 0x000001e5, - IMG_FMT_RESERVED_486__GFX10CORE = 0x000001e6, - IMG_FMT_RESERVED_487__GFX10CORE = 0x000001e7, - IMG_FMT_RESERVED_488__GFX10CORE = 0x000001e8, - IMG_FMT_RESERVED_489__GFX10CORE = 0x000001e9, - IMG_FMT_RESERVED_490__GFX10CORE = 0x000001ea, - IMG_FMT_RESERVED_491__GFX10CORE = 0x000001eb, - IMG_FMT_RESERVED_492__GFX10CORE = 0x000001ec, - IMG_FMT_RESERVED_493__GFX10CORE = 0x000001ed, - IMG_FMT_RESERVED_494__GFX10CORE = 0x000001ee, - IMG_FMT_RESERVED_495__GFX10CORE = 0x000001ef, - IMG_FMT_RESERVED_496__GFX10CORE = 0x000001f0, - IMG_FMT_RESERVED_497__GFX10CORE = 0x000001f1, - IMG_FMT_RESERVED_498__GFX10CORE = 0x000001f2, - IMG_FMT_RESERVED_499__GFX10CORE = 0x000001f3, - IMG_FMT_RESERVED_500__GFX10CORE = 0x000001f4, - IMG_FMT_RESERVED_501__GFX10CORE = 0x000001f5, - IMG_FMT_RESERVED_502__GFX10CORE = 0x000001f6, - IMG_FMT_RESERVED_503__GFX10CORE = 0x000001f7, - IMG_FMT_RESERVED_504__GFX10CORE = 0x000001f8, - IMG_FMT_RESERVED_505__GFX10CORE = 0x000001f9, - IMG_FMT_RESERVED_506__GFX10CORE = 0x000001fa, - IMG_FMT_RESERVED_507__GFX10CORE = 0x000001fb, - IMG_FMT_RESERVED_508__GFX10CORE = 0x000001fc, - IMG_FMT_RESERVED_509__GFX10CORE = 0x000001fd, - IMG_FMT_RESERVED_510__GFX10CORE = 0x000001fe, - IMG_FMT_RESERVED_511__GFX10CORE = 0x000001ff, - IMG_FMT_RESERVED_96__GFX10COREPLUS = 0x00000060, - IMG_FMT_RESERVED_97__GFX10COREPLUS = 0x00000061, - IMG_FMT_RESERVED_123__GFX10COREPLUS = 0x0000007b, - IMG_FMT_RESERVED_124__GFX10COREPLUS = 0x0000007c, - IMG_FMT_RESERVED_125__GFX10COREPLUS = 0x0000007d, - IMG_FMT_RESERVED_126__GFX10COREPLUS = 0x0000007e, - IMG_FMT_RESERVED_127__GFX10COREPLUS = 0x0000007f, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - IMG_FMT_MM_10_IN_16_UNORM__GFX11 = 0x0000005a, - IMG_FMT_MM_10_IN_16_UINT__GFX11 = 0x0000005b, - IMG_FMT_MM_10_IN_16_16_UNORM__GFX11 = 0x0000005c, - IMG_FMT_MM_10_IN_16_16_UINT__GFX11 = 0x0000005d, - IMG_FMT_MM_10_IN_16_16_16_16_UNORM__GFX11 = 0x0000005e, - IMG_FMT_MM_10_IN_16_16_16_16_UINT__GFX11 = 0x0000005f, - IMG_FMT_BC4_UNORM__GFX11 = 0x00000073, - IMG_FMT_BC4_SNORM__GFX11 = 0x00000074, - IMG_FMT_BC5_UNORM__GFX11 = 0x00000075, - IMG_FMT_BC5_SNORM__GFX11 = 0x00000076, - IMG_FMT_BC6_UFLOAT__GFX11 = 0x00000077, - IMG_FMT_BC6_SFLOAT__GFX11 = 0x00000078, - IMG_FMT_BC7_UNORM__GFX11 = 0x00000079, - IMG_FMT_BC7_SRGB__GFX11 = 0x0000007a, - IMG_FMT_RESERVED_128__GFX11 = 0x00000080, - IMG_FMT_RESERVED_129__GFX11 = 0x00000081, - IMG_FMT_RESERVED_130__GFX11 = 0x00000082, - IMG_FMT_RESERVED_131__GFX11 = 0x00000083, - IMG_FMT_RESERVED_132__GFX11 = 0x00000084, - IMG_FMT_RESERVED_133__GFX11 = 0x00000085, - IMG_FMT_RESERVED_134__GFX11 = 0x00000086, - IMG_FMT_RESERVED_135__GFX11 = 0x00000087, - IMG_FMT_RESERVED_136__GFX11 = 0x00000088, - IMG_FMT_RESERVED_137__GFX11 = 0x00000089, - IMG_FMT_RESERVED_138__GFX11 = 0x0000008a, - IMG_FMT_RESERVED_139__GFX11 = 0x0000008b, - IMG_FMT_RESERVED_140__GFX11 = 0x0000008c, - IMG_FMT_RESERVED_141__GFX11 = 0x0000008d, - IMG_FMT_RESERVED_142__GFX11 = 0x0000008e, - IMG_FMT_RESERVED_143__GFX11 = 0x0000008f, - IMG_FMT_RESERVED_144__GFX11 = 0x00000090, - IMG_FMT_RESERVED_145__GFX11 = 0x00000091, - IMG_FMT_RESERVED_146__GFX11 = 0x00000092, - IMG_FMT_RESERVED_161__GFX11 = 0x000000a1, - IMG_FMT_RESERVED_162__GFX11 = 0x000000a2, - IMG_FMT_RESERVED_163__GFX11 = 0x000000a3, - IMG_FMT_RESERVED_164__GFX11 = 0x000000a4, - IMG_FMT_RESERVED_165__GFX11 = 0x000000a5, - IMG_FMT_RESERVED_166__GFX11 = 0x000000a6, - IMG_FMT_RESERVED_167__GFX11 = 0x000000a7, - IMG_FMT_RESERVED_168__GFX11 = 0x000000a8, - IMG_FMT_RESERVED_169__GFX11 = 0x000000a9, - IMG_FMT_RESERVED_170__GFX11 = 0x000000aa, - IMG_FMT_RESERVED_171__GFX11 = 0x000000ab, - IMG_FMT_RESERVED_172__GFX11 = 0x000000ac, - IMG_FMT_RESERVED_173__GFX11 = 0x000000ad, - IMG_FMT_RESERVED_174__GFX11 = 0x000000ae, - IMG_FMT_MM_8_8_8_8_UNORM__GFX11 = 0x000000d3, - IMG_FMT_MM_8_8_8_8_UINT__GFX11 = 0x000000d4, - IMG_FMT_MM_VYUY8_UNORM__GFX11 = 0x000000d5, - IMG_FMT_MM_VYUY8_UINT__GFX11 = 0x000000d6, - IMG_FMT_MM_10_11_11_UNORM__GFX11 = 0x000000d7, - IMG_FMT_MM_10_11_11_UINT__GFX11 = 0x000000d8, - IMG_FMT_MM_2_10_10_10_UNORM__GFX11 = 0x000000d9, - IMG_FMT_MM_2_10_10_10_UINT__GFX11 = 0x000000da, - IMG_FMT_MM_16_16_16_16_UNORM__GFX11 = 0x000000db, - IMG_FMT_MM_16_16_16_16_UINT__GFX11 = 0x000000dc, - IMG_FMT_MM_12_IN_16_UNORM__GFX11 = 0x000000dd, - IMG_FMT_MM_12_IN_16_UINT__GFX11 = 0x000000de, - IMG_FMT_MM_12_IN_16_16_UNORM__GFX11 = 0x000000df, - IMG_FMT_MM_12_IN_16_16_UINT__GFX11 = 0x000000e0, - IMG_FMT_MM_12_IN_16_16_16_16_UNORM__GFX11 = 0x000000e1, - IMG_FMT_MM_12_IN_16_16_16_16_UINT__GFX11 = 0x000000e2, - IMG_FMT_6E4_FLOAT__GFX11 = 0x000000e3, - IMG_FMT_7E3_FLOAT__GFX11 = 0x000000e4, -#endif -#if CHIP_HDR_NAVI21 - IMG_FMT_MM_12_IN_16_UNORM__NV21 = 0x00000120, - IMG_FMT_MM_12_IN_16_UINT__NV21 = 0x00000121, - IMG_FMT_MM_12_IN_16_16_UNORM__NV21 = 0x00000122, - IMG_FMT_MM_12_IN_16_16_UINT__NV21 = 0x00000123, - IMG_FMT_MM_12_IN_16_16_16_16_UNORM__NV21 = 0x00000124, - IMG_FMT_MM_12_IN_16_16_16_16_UINT__NV21 = 0x00000125, -#endif -#if CHIP_HDR_NAVI22 - IMG_FMT_MM_12_IN_16_UNORM__NV22 = 0x00000120, - IMG_FMT_MM_12_IN_16_UINT__NV22 = 0x00000121, - IMG_FMT_MM_12_IN_16_16_UNORM__NV22 = 0x00000122, - IMG_FMT_MM_12_IN_16_16_UINT__NV22 = 0x00000123, - IMG_FMT_MM_12_IN_16_16_16_16_UNORM__NV22 = 0x00000124, - IMG_FMT_MM_12_IN_16_16_16_16_UINT__NV22 = 0x00000125, -#endif -#if CHIP_HDR_NAVI23 - IMG_FMT_MM_12_IN_16_UNORM__NV23 = 0x00000120, - IMG_FMT_MM_12_IN_16_UINT__NV23 = 0x00000121, - IMG_FMT_MM_12_IN_16_16_UNORM__NV23 = 0x00000122, - IMG_FMT_MM_12_IN_16_16_UINT__NV23 = 0x00000123, - IMG_FMT_MM_12_IN_16_16_16_16_UNORM__NV23 = 0x00000124, - IMG_FMT_MM_12_IN_16_16_16_16_UINT__NV23 = 0x00000125, -#endif -#if CHIP_HDR_NAVI24 - IMG_FMT_MM_12_IN_16_UNORM__NV24 = 0x00000120, - IMG_FMT_MM_12_IN_16_UINT__NV24 = 0x00000121, - IMG_FMT_MM_12_IN_16_16_UNORM__NV24 = 0x00000122, - IMG_FMT_MM_12_IN_16_16_UINT__NV24 = 0x00000123, - IMG_FMT_MM_12_IN_16_16_16_16_UNORM__NV24 = 0x00000124, - IMG_FMT_MM_12_IN_16_16_16_16_UINT__NV24 = 0x00000125, -#endif -#if CHIP_HDR_NAVI32 - IMG_FMT_FMASK8_S2_F2__NV32 = 0x00000060, - IMG_FMT_FMASK8_S4_F4__NV32 = 0x00000061, - IMG_FMT_FMASK8_S2_F1__NV32 = 0x00000062, - IMG_FMT_FMASK8_S4_F1__NV32 = 0x00000063, - IMG_FMT_FMASK8_S8_F1__NV32 = 0x00000064, - IMG_FMT_FMASK8_S4_F2__NV32 = 0x00000065, - IMG_FMT_FMASK16_S16_F1__NV32 = 0x00000066, - IMG_FMT_FMASK16_S8_F2__NV32 = 0x00000067, - IMG_FMT_FMASK32_S16_F2__NV32 = 0x00000068, - IMG_FMT_FMASK32_S8_F4__NV32 = 0x00000069, - IMG_FMT_FMASK64_S16_F4__NV32 = 0x0000006a, - IMG_FMT_FMASK64_S16_F8__NV32 = 0x0000006b, - IMG_FMT_FMASK32_S8_F8__NV32 = 0x0000006c, -#endif -#if CHIP_HDR_NAVI33 - IMG_FMT_FMASK8_S2_F2__NV33 = 0x00000060, - IMG_FMT_FMASK8_S4_F4__NV33 = 0x00000061, - IMG_FMT_FMASK8_S2_F1__NV33 = 0x00000062, - IMG_FMT_FMASK8_S4_F1__NV33 = 0x00000063, - IMG_FMT_FMASK8_S8_F1__NV33 = 0x00000064, - IMG_FMT_FMASK8_S4_F2__NV33 = 0x00000065, - IMG_FMT_FMASK16_S16_F1__NV33 = 0x00000066, - IMG_FMT_FMASK16_S8_F2__NV33 = 0x00000067, - IMG_FMT_FMASK32_S16_F2__NV33 = 0x00000068, - IMG_FMT_FMASK32_S8_F4__NV33 = 0x00000069, - IMG_FMT_FMASK64_S16_F4__NV33 = 0x0000006a, - IMG_FMT_FMASK64_S16_F8__NV33 = 0x0000006b, - IMG_FMT_FMASK32_S8_F8__NV33 = 0x0000006c, -#endif - IMG_FMT_RESERVED_288__RAPHAEL = 0x00000120, - IMG_FMT_RESERVED_289__RAPHAEL = 0x00000121, - IMG_FMT_RESERVED_290__RAPHAEL = 0x00000122, - IMG_FMT_RESERVED_291__RAPHAEL = 0x00000123, - IMG_FMT_RESERVED_292__RAPHAEL = 0x00000124, - IMG_FMT_RESERVED_293__RAPHAEL = 0x00000125, - IMG_FMT_RESERVED_288__REMBRANDT = 0x00000120, - IMG_FMT_RESERVED_289__REMBRANDT = 0x00000121, - IMG_FMT_RESERVED_290__REMBRANDT = 0x00000122, - IMG_FMT_RESERVED_291__REMBRANDT = 0x00000123, - IMG_FMT_RESERVED_292__REMBRANDT = 0x00000124, - IMG_FMT_RESERVED_293__REMBRANDT = 0x00000125, -} IMG_FMT; - -typedef enum IMG_NUM_FORMAT { - IMG_NUM_FORMAT_UNORM = 0x00000000, - IMG_NUM_FORMAT_SNORM = 0x00000001, - IMG_NUM_FORMAT_USCALED = 0x00000002, - IMG_NUM_FORMAT_SSCALED = 0x00000003, - IMG_NUM_FORMAT_UINT = 0x00000004, - IMG_NUM_FORMAT_SINT = 0x00000005, - IMG_NUM_FORMAT_FLOAT = 0x00000007, - IMG_NUM_FORMAT_RESERVED_6__GFX09 = 0x00000006, - IMG_NUM_FORMAT_SRGB__GFX09 = 0x00000009, - IMG_NUM_FORMAT_UNORM_UINT__GFX09 = 0x0000000a, - IMG_NUM_FORMAT_RESERVED_11__GFX09 = 0x0000000b, - IMG_NUM_FORMAT_RESERVED_12__GFX09 = 0x0000000c, - IMG_NUM_FORMAT_RESERVED_13__GFX09 = 0x0000000d, - IMG_NUM_FORMAT_RESERVED_8__GFX09_10 = 0x00000008, - IMG_NUM_FORMAT_RESERVED_14__GFX09_10 = 0x0000000e, - IMG_NUM_FORMAT_RESERVED_15__GFX09_10 = 0x0000000f, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - IMG_NUM_FORMAT_SRGB__GFX104PLUS = 0x00000006, -#endif - IMG_NUM_FORMAT_SNORM_NZ__GFX10CORE = 0x00000006, - IMG_NUM_FORMAT_SRGB__GFX10CORE = 0x00000009, - IMG_NUM_FORMAT_UBNORM__GFX10CORE = 0x0000000a, - IMG_NUM_FORMAT_UBNORM_NZ__GFX10CORE = 0x0000000b, - IMG_NUM_FORMAT_UBINT__GFX10CORE = 0x0000000c, - IMG_NUM_FORMAT_UBSCALED__GFX10CORE = 0x0000000d, -} IMG_NUM_FORMAT; - -typedef enum IMG_NUM_FORMAT_ASTC_2D { - IMG_NUM_FORMAT_ASTC_2D_RESERVED_14 = 0x0000000e, - IMG_NUM_FORMAT_ASTC_2D_RESERVED_15 = 0x0000000f, - IMG_NUM_FORMAT_ASTC_2D_4x4__GFX09 = 0x00000000, - IMG_NUM_FORMAT_ASTC_2D_5x4__GFX09 = 0x00000001, - IMG_NUM_FORMAT_ASTC_2D_5x5__GFX09 = 0x00000002, - IMG_NUM_FORMAT_ASTC_2D_6x5__GFX09 = 0x00000003, - IMG_NUM_FORMAT_ASTC_2D_6x6__GFX09 = 0x00000004, - IMG_NUM_FORMAT_ASTC_2D_8x5__GFX09 = 0x00000005, - IMG_NUM_FORMAT_ASTC_2D_8x6__GFX09 = 0x00000006, - IMG_NUM_FORMAT_ASTC_2D_8x8__GFX09 = 0x00000007, - IMG_NUM_FORMAT_ASTC_2D_10x5__GFX09 = 0x00000008, - IMG_NUM_FORMAT_ASTC_2D_10x6__GFX09 = 0x00000009, - IMG_NUM_FORMAT_ASTC_2D_10x8__GFX09 = 0x0000000a, - IMG_NUM_FORMAT_ASTC_2D_10x10__GFX09 = 0x0000000b, - IMG_NUM_FORMAT_ASTC_2D_12x10__GFX09 = 0x0000000c, - IMG_NUM_FORMAT_ASTC_2D_12x12__GFX09 = 0x0000000d, - IMG_NUM_FORMAT_ASTC_2D_4X4__GFX10 = 0x00000000, - IMG_NUM_FORMAT_ASTC_2D_5X4__GFX10 = 0x00000001, - IMG_NUM_FORMAT_ASTC_2D_5X5__GFX10 = 0x00000002, - IMG_NUM_FORMAT_ASTC_2D_6X5__GFX10 = 0x00000003, - IMG_NUM_FORMAT_ASTC_2D_6X6__GFX10 = 0x00000004, - IMG_NUM_FORMAT_ASTC_2D_8X5__GFX10 = 0x00000005, - IMG_NUM_FORMAT_ASTC_2D_8X6__GFX10 = 0x00000006, - IMG_NUM_FORMAT_ASTC_2D_8X8__GFX10 = 0x00000007, - IMG_NUM_FORMAT_ASTC_2D_10X5__GFX10 = 0x00000008, - IMG_NUM_FORMAT_ASTC_2D_10X6__GFX10 = 0x00000009, - IMG_NUM_FORMAT_ASTC_2D_10X8__GFX10 = 0x0000000a, - IMG_NUM_FORMAT_ASTC_2D_10X10__GFX10 = 0x0000000b, - IMG_NUM_FORMAT_ASTC_2D_12X10__GFX10 = 0x0000000c, - IMG_NUM_FORMAT_ASTC_2D_12X12__GFX10 = 0x0000000d, -} IMG_NUM_FORMAT_ASTC_2D; - -typedef enum IMG_NUM_FORMAT_ASTC_3D { - IMG_NUM_FORMAT_ASTC_3D_3x3x3__GFX09 = 0x00000000, - IMG_NUM_FORMAT_ASTC_3D_4x3x3__GFX09 = 0x00000001, - IMG_NUM_FORMAT_ASTC_3D_4x4x3__GFX09 = 0x00000002, - IMG_NUM_FORMAT_ASTC_3D_4x4x4__GFX09 = 0x00000003, - IMG_NUM_FORMAT_ASTC_3D_5x4x4__GFX09 = 0x00000004, - IMG_NUM_FORMAT_ASTC_3D_5x5x4__GFX09 = 0x00000005, - IMG_NUM_FORMAT_ASTC_3D_5x5x5__GFX09 = 0x00000006, - IMG_NUM_FORMAT_ASTC_3D_6x5x5__GFX09 = 0x00000007, - IMG_NUM_FORMAT_ASTC_3D_6x6x5__GFX09 = 0x00000008, - IMG_NUM_FORMAT_ASTC_3D_6x6x6__GFX09 = 0x00000009, - IMG_NUM_FORMAT_ASTC_3D_RESERVED_10__GFX09 = 0x0000000a, - IMG_NUM_FORMAT_ASTC_3D_RESERVED_11__GFX09 = 0x0000000b, - IMG_NUM_FORMAT_ASTC_3D_RESERVED_12__GFX09 = 0x0000000c, - IMG_NUM_FORMAT_ASTC_3D_RESERVED_13__GFX09 = 0x0000000d, - IMG_NUM_FORMAT_ASTC_3D_RESERVED_14__GFX09 = 0x0000000e, - IMG_NUM_FORMAT_ASTC_3D_RESERVED_15__GFX09 = 0x0000000f, - IMG_NUM_FORMAT_ASTC_3D_3X3X3__GFX10CORE = 0x00000000, - IMG_NUM_FORMAT_ASTC_3D_4X3X3__GFX10CORE = 0x00000001, - IMG_NUM_FORMAT_ASTC_3D_4X4X3__GFX10CORE = 0x00000002, - IMG_NUM_FORMAT_ASTC_3D_4X4X4__GFX10CORE = 0x00000003, - IMG_NUM_FORMAT_ASTC_3D_5X4X4__GFX10CORE = 0x00000004, - IMG_NUM_FORMAT_ASTC_3D_5X5X4__GFX10CORE = 0x00000005, - IMG_NUM_FORMAT_ASTC_3D_5X5X5__GFX10CORE = 0x00000006, - IMG_NUM_FORMAT_ASTC_3D_6X5X5__GFX10CORE = 0x00000007, - IMG_NUM_FORMAT_ASTC_3D_6X6X5__GFX10CORE = 0x00000008, - IMG_NUM_FORMAT_ASTC_3D_6X6X6__GFX10CORE = 0x00000009, - IMG_NUM_FORMAT_ASTC_3D_RESERVED_10__GFX10CORE = 0x0000000a, - IMG_NUM_FORMAT_ASTC_3D_RESERVED_11__GFX10CORE = 0x0000000b, - IMG_NUM_FORMAT_ASTC_3D_RESERVED_12__GFX10CORE = 0x0000000c, - IMG_NUM_FORMAT_ASTC_3D_RESERVED_13__GFX10CORE = 0x0000000d, - IMG_NUM_FORMAT_ASTC_3D_RESERVED_14__GFX10CORE = 0x0000000e, - IMG_NUM_FORMAT_ASTC_3D_RESERVED_15__GFX10CORE = 0x0000000f, -} IMG_NUM_FORMAT_ASTC_3D; - -typedef enum IMG_NUM_FORMAT_FMASK { - IMG_NUM_FORMAT_FMASK_16_16_1 = 0x00000006, - IMG_NUM_FORMAT_FMASK_16_8_2 = 0x00000007, - IMG_NUM_FORMAT_FMASK_32_16_2 = 0x00000008, - IMG_NUM_FORMAT_FMASK_32_8_4 = 0x00000009, - IMG_NUM_FORMAT_FMASK_RESERVED_13 = 0x0000000d, - IMG_NUM_FORMAT_FMASK_RESERVED_14 = 0x0000000e, - IMG_NUM_FORMAT_FMASK_RESERVED_15 = 0x0000000f, - IMG_NUM_FORMAT_FMASK_8_2_1__CORE = 0x00000000, - IMG_NUM_FORMAT_FMASK_8_4_1__CORE = 0x00000001, - IMG_NUM_FORMAT_FMASK_8_8_1__CORE = 0x00000002, - IMG_NUM_FORMAT_FMASK_8_2_2__CORE = 0x00000003, - IMG_NUM_FORMAT_FMASK_8_4_2__CORE = 0x00000004, - IMG_NUM_FORMAT_FMASK_8_4_4__CORE = 0x00000005, - IMG_NUM_FORMAT_FMASK_32_8_8__CORE = 0x0000000a, - IMG_NUM_FORMAT_FMASK_64_16_4__CORE = 0x0000000b, - IMG_NUM_FORMAT_FMASK_64_16_8__CORE = 0x0000000c, -} IMG_NUM_FORMAT_FMASK; - -typedef enum IMG_NUM_FORMAT_N_IN_16 { - IMG_NUM_FORMAT_N_IN_16_RESERVED_0 = 0x00000000, - IMG_NUM_FORMAT_N_IN_16_UNORM_10 = 0x00000001, - IMG_NUM_FORMAT_N_IN_16_UNORM_9 = 0x00000002, - IMG_NUM_FORMAT_N_IN_16_RESERVED_3 = 0x00000003, - IMG_NUM_FORMAT_N_IN_16_UINT_10 = 0x00000004, - IMG_NUM_FORMAT_N_IN_16_UINT_9 = 0x00000005, - IMG_NUM_FORMAT_N_IN_16_RESERVED_6 = 0x00000006, - IMG_NUM_FORMAT_N_IN_16_UNORM_UINT_10 = 0x00000007, - IMG_NUM_FORMAT_N_IN_16_UNORM_UINT_9 = 0x00000008, - IMG_NUM_FORMAT_N_IN_16_RESERVED_9 = 0x00000009, - IMG_NUM_FORMAT_N_IN_16_RESERVED_10 = 0x0000000a, - IMG_NUM_FORMAT_N_IN_16_RESERVED_11 = 0x0000000b, - IMG_NUM_FORMAT_N_IN_16_RESERVED_12 = 0x0000000c, - IMG_NUM_FORMAT_N_IN_16_RESERVED_13 = 0x0000000d, - IMG_NUM_FORMAT_N_IN_16_RESERVED_14 = 0x0000000e, - IMG_NUM_FORMAT_N_IN_16_RESERVED_15 = 0x0000000f, -} IMG_NUM_FORMAT_N_IN_16; - -typedef enum MTYPE { - MTYPE_UC = 0x00000003, - MTYPE_NC__GFX09 = 0x00000000, - MTYPE_WC__GFX09 = 0x00000001, - MTYPE_CC__GFX09 = 0x00000002, - MTYPE_RESERVED_1__GFX10COREPLUS = 0x00000001, - MTYPE_C_RO_S__GFX10COREPLUS = 0x00000002, - MTYPE_C_RW_S__GFX10COREPLUS = 0x00000004, - MTYPE_RESERVED_5__GFX10COREPLUS = 0x00000005, - MTYPE_C_RO_US__GFX10COREPLUS = 0x00000006, - MTYPE_RESERVED_7__GFX10COREPLUS = 0x00000007, - MTYPE_C_RW_US__GFX10PLUS = 0x00000000, -} MTYPE; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef enum OreoMode { - OMODE_BLEND = 0x00000000, - OMODE_O_THEN_B = 0x00000001, - OMODE_P_THEN_O_THEN_B = 0x00000002, - OMODE_RESERVED_3 = 0x00000003, -} OreoMode; -#endif - -typedef enum PerfCounter_Vals { - DB_PERF_SEL_SC_DB_tile_sends = 0x00000000, - DB_PERF_SEL_SC_DB_tile_busy = 0x00000001, - DB_PERF_SEL_SC_DB_tile_stalls = 0x00000002, - DB_PERF_SEL_SC_DB_tile_events = 0x00000003, - DB_PERF_SEL_SC_DB_tile_tiles = 0x00000004, - DB_PERF_SEL_SC_DB_tile_covered = 0x00000005, - DB_PERF_SEL_hiz_tc_read_starved = 0x00000006, - DB_PERF_SEL_hiz_tc_write_stall = 0x00000007, - DB_PERF_SEL_DB_SC_tile_sends = 0x0000000a, - DB_PERF_SEL_DB_SC_tile_busy = 0x0000000b, - DB_PERF_SEL_DB_SC_tile_stalls = 0x0000000c, - DB_PERF_SEL_DB_SC_tile_df_stalls = 0x0000000d, - DB_PERF_SEL_DB_SC_tile_tiles = 0x0000000e, - DB_PERF_SEL_DB_SC_tile_culled = 0x0000000f, - DB_PERF_SEL_DB_SC_tile_hier_kill = 0x00000010, - DB_PERF_SEL_DB_SC_tile_fast_ops = 0x00000011, - DB_PERF_SEL_DB_SC_tile_no_ops = 0x00000012, - DB_PERF_SEL_DB_SC_tile_tile_rate = 0x00000013, - DB_PERF_SEL_DB_SC_tile_ssaa_kill = 0x00000014, - DB_PERF_SEL_DB_SC_tile_fast_z_ops = 0x00000015, - DB_PERF_SEL_DB_SC_tile_fast_stencil_ops = 0x00000016, - DB_PERF_SEL_SC_DB_quad_sends = 0x00000017, - DB_PERF_SEL_SC_DB_quad_busy = 0x00000018, - DB_PERF_SEL_SC_DB_quad_squads = 0x00000019, - DB_PERF_SEL_SC_DB_quad_tiles = 0x0000001a, - DB_PERF_SEL_SC_DB_quad_pixels = 0x0000001b, - DB_PERF_SEL_SC_DB_quad_killed_tiles = 0x0000001c, - DB_PERF_SEL_DB_SC_quad_sends = 0x0000001d, - DB_PERF_SEL_DB_SC_quad_busy = 0x0000001e, - DB_PERF_SEL_DB_SC_quad_stalls = 0x0000001f, - DB_PERF_SEL_DB_SC_quad_tiles = 0x00000020, - DB_PERF_SEL_DB_SC_quad_lit_quad = 0x00000021, - DB_PERF_SEL_SX_DB_quad_sends = 0x00000025, - DB_PERF_SEL_SX_DB_quad_busy = 0x00000026, - DB_PERF_SEL_SX_DB_quad_stalls = 0x00000027, - DB_PERF_SEL_SX_DB_quad_quads = 0x00000028, - DB_PERF_SEL_SX_DB_quad_pixels = 0x00000029, - DB_PERF_SEL_SX_DB_quad_exports = 0x0000002a, - DB_PERF_SEL_SH_quads_outstanding_sum = 0x0000002b, - DB_PERF_SEL_tile_rd_sends = 0x00000030, - DB_PERF_SEL_mi_tile_rd_outstanding_sum = 0x00000031, - DB_PERF_SEL_quad_rd_sends = 0x00000032, - DB_PERF_SEL_quad_rd_busy = 0x00000033, - DB_PERF_SEL_quad_rd_mi_stall = 0x00000034, - DB_PERF_SEL_quad_rd_rw_collision = 0x00000035, - DB_PERF_SEL_quad_rd_tag_stall = 0x00000036, - DB_PERF_SEL_quad_rd_32byte_reqs = 0x00000037, - DB_PERF_SEL_quad_rd_panic = 0x00000038, - DB_PERF_SEL_mi_quad_rd_outstanding_sum = 0x00000039, - DB_PERF_SEL_quad_rdret_sends = 0x0000003a, - DB_PERF_SEL_quad_rdret_busy = 0x0000003b, - DB_PERF_SEL_tile_wr_sends = 0x0000003c, - DB_PERF_SEL_tile_wr_acks = 0x0000003d, - DB_PERF_SEL_mi_tile_wr_outstanding_sum = 0x0000003e, - DB_PERF_SEL_quad_wr_sends = 0x0000003f, - DB_PERF_SEL_quad_wr_busy = 0x00000040, - DB_PERF_SEL_quad_wr_mi_stall = 0x00000041, - DB_PERF_SEL_quad_wr_coherency_stall = 0x00000042, - DB_PERF_SEL_quad_wr_acks = 0x00000043, - DB_PERF_SEL_mi_quad_wr_outstanding_sum = 0x00000044, - DB_PERF_SEL_Tile_Cache_misses = 0x00000045, - DB_PERF_SEL_Tile_Cache_hits = 0x00000046, - DB_PERF_SEL_Tile_Cache_flushes = 0x00000047, - DB_PERF_SEL_Tile_Cache_surface_stall = 0x00000048, - DB_PERF_SEL_Tile_Cache_starves = 0x00000049, - DB_PERF_SEL_Tile_Cache_mem_return_starve = 0x0000004a, - DB_PERF_SEL_tcp_dispatcher_reads = 0x0000004b, - DB_PERF_SEL_tcp_prefetcher_reads = 0x0000004c, - DB_PERF_SEL_tcp_preloader_reads = 0x0000004d, - DB_PERF_SEL_tcp_dispatcher_flushes = 0x0000004e, - DB_PERF_SEL_tcp_prefetcher_flushes = 0x0000004f, - DB_PERF_SEL_tcp_preloader_flushes = 0x00000050, - DB_PERF_SEL_Depth_Tile_Cache_sends = 0x00000051, - DB_PERF_SEL_Depth_Tile_Cache_busy = 0x00000052, - DB_PERF_SEL_Depth_Tile_Cache_starves = 0x00000053, - DB_PERF_SEL_Depth_Tile_Cache_dtile_locked = 0x00000054, - DB_PERF_SEL_Depth_Tile_Cache_alloc_stall = 0x00000055, - DB_PERF_SEL_Depth_Tile_Cache_misses = 0x00000056, - DB_PERF_SEL_Depth_Tile_Cache_hits = 0x00000057, - DB_PERF_SEL_Depth_Tile_Cache_flushes = 0x00000058, - DB_PERF_SEL_Depth_Tile_Cache_noop_tile = 0x00000059, - DB_PERF_SEL_Depth_Tile_Cache_detailed_noop = 0x0000005a, - DB_PERF_SEL_Depth_Tile_Cache_event = 0x0000005b, - DB_PERF_SEL_Depth_Tile_Cache_tile_frees = 0x0000005c, - DB_PERF_SEL_Depth_Tile_Cache_data_frees = 0x0000005d, - DB_PERF_SEL_Depth_Tile_Cache_mem_return_starve = 0x0000005e, - DB_PERF_SEL_Stencil_Cache_misses = 0x0000005f, - DB_PERF_SEL_Stencil_Cache_hits = 0x00000060, - DB_PERF_SEL_Stencil_Cache_flushes = 0x00000061, - DB_PERF_SEL_Stencil_Cache_starves = 0x00000062, - DB_PERF_SEL_Stencil_Cache_frees = 0x00000063, - DB_PERF_SEL_Z_Cache_separate_Z_misses = 0x00000064, - DB_PERF_SEL_Z_Cache_separate_Z_hits = 0x00000065, - DB_PERF_SEL_Z_Cache_separate_Z_flushes = 0x00000066, - DB_PERF_SEL_Z_Cache_separate_Z_starves = 0x00000067, - DB_PERF_SEL_Z_Cache_pmask_misses = 0x00000068, - DB_PERF_SEL_Z_Cache_pmask_hits = 0x00000069, - DB_PERF_SEL_Z_Cache_pmask_flushes = 0x0000006a, - DB_PERF_SEL_Z_Cache_pmask_starves = 0x0000006b, - DB_PERF_SEL_Z_Cache_frees = 0x0000006c, - DB_PERF_SEL_Plane_Cache_misses = 0x0000006d, - DB_PERF_SEL_Plane_Cache_hits = 0x0000006e, - DB_PERF_SEL_Plane_Cache_flushes = 0x0000006f, - DB_PERF_SEL_Plane_Cache_starves = 0x00000070, - DB_PERF_SEL_Plane_Cache_frees = 0x00000071, - DB_PERF_SEL_flush_expanded_stencil = 0x00000072, - DB_PERF_SEL_flush_compressed_stencil = 0x00000073, - DB_PERF_SEL_flush_single_stencil = 0x00000074, - DB_PERF_SEL_planes_flushed = 0x00000075, - DB_PERF_SEL_flush_1plane = 0x00000076, - DB_PERF_SEL_flush_2plane = 0x00000077, - DB_PERF_SEL_flush_3plane = 0x00000078, - DB_PERF_SEL_flush_4plane = 0x00000079, - DB_PERF_SEL_flush_5plane = 0x0000007a, - DB_PERF_SEL_flush_6plane = 0x0000007b, - DB_PERF_SEL_flush_7plane = 0x0000007c, - DB_PERF_SEL_flush_8plane = 0x0000007d, - DB_PERF_SEL_flush_9plane = 0x0000007e, - DB_PERF_SEL_flush_10plane = 0x0000007f, - DB_PERF_SEL_flush_11plane = 0x00000080, - DB_PERF_SEL_flush_12plane = 0x00000081, - DB_PERF_SEL_flush_13plane = 0x00000082, - DB_PERF_SEL_flush_14plane = 0x00000083, - DB_PERF_SEL_flush_15plane = 0x00000084, - DB_PERF_SEL_flush_16plane = 0x00000085, - DB_PERF_SEL_flush_expanded_z = 0x00000086, - DB_PERF_SEL_earlyZ_waiting_for_postZ_done = 0x00000087, - DB_PERF_SEL_reZ_waiting_for_postZ_done = 0x00000088, - DB_PERF_SEL_dk_tile_sends = 0x00000089, - DB_PERF_SEL_dk_tile_busy = 0x0000008a, - DB_PERF_SEL_dk_tile_quad_starves = 0x0000008b, - DB_PERF_SEL_dk_tile_stalls = 0x0000008c, - DB_PERF_SEL_dk_squad_sends = 0x0000008d, - DB_PERF_SEL_dk_squad_busy = 0x0000008e, - DB_PERF_SEL_dk_squad_stalls = 0x0000008f, - DB_PERF_SEL_Op_Pipe_Busy = 0x00000090, - DB_PERF_SEL_Op_Pipe_MC_Read_stall = 0x00000091, - DB_PERF_SEL_qc_busy = 0x00000092, - DB_PERF_SEL_qc_xfc = 0x00000093, - DB_PERF_SEL_qc_conflicts = 0x00000094, - DB_PERF_SEL_qc_full_stall = 0x00000095, - DB_PERF_SEL_qc_in_preZ_tile_stalls_postZ = 0x00000096, - DB_PERF_SEL_qc_in_postZ_tile_stalls_preZ = 0x00000097, - DB_PERF_SEL_tsc_insert_summarize_stall = 0x00000098, - DB_PERF_SEL_tl_busy = 0x00000099, - DB_PERF_SEL_tl_dtc_read_starved = 0x0000009a, - DB_PERF_SEL_tl_z_fetch_stall = 0x0000009b, - DB_PERF_SEL_tl_stencil_stall = 0x0000009c, - DB_PERF_SEL_tl_z_decompress_stall = 0x0000009d, - DB_PERF_SEL_tl_stencil_locked_stall = 0x0000009e, - DB_PERF_SEL_tl_events = 0x0000009f, - DB_PERF_SEL_tl_summarize_squads = 0x000000a0, - DB_PERF_SEL_tl_flush_expand_squads = 0x000000a1, - DB_PERF_SEL_tl_expand_squads = 0x000000a2, - DB_PERF_SEL_tl_preZ_squads = 0x000000a3, - DB_PERF_SEL_tl_postZ_squads = 0x000000a4, - DB_PERF_SEL_tl_preZ_noop_squads = 0x000000a5, - DB_PERF_SEL_tl_postZ_noop_squads = 0x000000a6, - DB_PERF_SEL_tl_tile_ops = 0x000000a7, - DB_PERF_SEL_tl_in_xfc = 0x000000a8, - DB_PERF_SEL_tl_in_single_stencil_expand_stall = 0x000000a9, - DB_PERF_SEL_tl_in_fast_z_stall = 0x000000aa, - DB_PERF_SEL_tl_out_xfc = 0x000000ab, - DB_PERF_SEL_tl_out_squads = 0x000000ac, - DB_PERF_SEL_zf_plane_multicycle = 0x000000ad, - DB_PERF_SEL_PostZ_Samples_passing_Z = 0x000000ae, - DB_PERF_SEL_PostZ_Samples_failing_Z = 0x000000af, - DB_PERF_SEL_PostZ_Samples_failing_S = 0x000000b0, - DB_PERF_SEL_PreZ_Samples_passing_Z = 0x000000b1, - DB_PERF_SEL_PreZ_Samples_failing_Z = 0x000000b2, - DB_PERF_SEL_PreZ_Samples_failing_S = 0x000000b3, - DB_PERF_SEL_ts_tc_update_stall = 0x000000b4, - DB_PERF_SEL_sc_kick_start = 0x000000b5, - DB_PERF_SEL_sc_kick_end = 0x000000b6, - DB_PERF_SEL_clock_reg_active = 0x000000b7, - DB_PERF_SEL_clock_main_active = 0x000000b8, - DB_PERF_SEL_clock_mem_export_active = 0x000000b9, - DB_PERF_SEL_esr_ps_out_busy = 0x000000ba, - DB_PERF_SEL_esr_ps_lqf_busy = 0x000000bb, - DB_PERF_SEL_esr_ps_lqf_stall = 0x000000bc, - DB_PERF_SEL_etr_out_send = 0x000000bd, - DB_PERF_SEL_etr_out_busy = 0x000000be, - DB_PERF_SEL_etr_out_ltile_probe_fifo_full_stall = 0x000000bf, - DB_PERF_SEL_etr_out_esr_stall = 0x000000c1, - DB_PERF_SEL_esr_eot_fwd_busy = 0x000000c4, - DB_PERF_SEL_esr_eot_fwd_holding_squad = 0x000000c5, - DB_PERF_SEL_esr_eot_fwd_forward = 0x000000c6, - DB_PERF_SEL_esr_sqq_zi_busy = 0x000000c7, - DB_PERF_SEL_esr_sqq_zi_stall = 0x000000c8, - DB_PERF_SEL_postzl_sq_pt_busy = 0x000000c9, - DB_PERF_SEL_postzl_sq_pt_stall = 0x000000ca, - DB_PERF_SEL_postzl_se_busy = 0x000000cb, - DB_PERF_SEL_postzl_se_stall = 0x000000cc, - DB_PERF_SEL_postzl_partial_launch = 0x000000cd, - DB_PERF_SEL_postzl_full_launch = 0x000000ce, - DB_PERF_SEL_postzl_partial_waiting = 0x000000cf, - DB_PERF_SEL_postzl_tile_mem_stall = 0x000000d0, - DB_PERF_SEL_postzl_tile_init_stall = 0x000000d1, - DB_PERF_SEL_prezl_tile_init_stall = 0x000000d3, - DB_PERF_SEL_dtt_sm_clash_stall = 0x000000d4, - DB_PERF_SEL_dtt_sm_slot_stall = 0x000000d5, - DB_PERF_SEL_dtt_sm_miss_stall = 0x000000d6, - DB_PERF_SEL_mi_rdreq_busy = 0x000000d7, - DB_PERF_SEL_mi_rdreq_stall = 0x000000d8, - DB_PERF_SEL_mi_wrreq_busy = 0x000000d9, - DB_PERF_SEL_mi_wrreq_stall = 0x000000da, - DB_PERF_SEL_recomp_tile_to_1zplane_no_fastop = 0x000000db, - DB_PERF_SEL_dkg_tile_rate_tile = 0x000000dc, - DB_PERF_SEL_prezl_src_in_sends = 0x000000dd, - DB_PERF_SEL_prezl_src_in_stall = 0x000000de, - DB_PERF_SEL_prezl_src_in_squads = 0x000000df, - DB_PERF_SEL_prezl_src_in_squads_unrolled = 0x000000e0, - DB_PERF_SEL_prezl_src_in_tile_rate = 0x000000e1, - DB_PERF_SEL_prezl_src_in_tile_rate_unrolled = 0x000000e2, - DB_PERF_SEL_prezl_src_out_stall = 0x000000e3, - DB_PERF_SEL_postzl_src_in_sends = 0x000000e4, - DB_PERF_SEL_postzl_src_in_stall = 0x000000e5, - DB_PERF_SEL_postzl_src_in_squads = 0x000000e6, - DB_PERF_SEL_postzl_src_in_squads_unrolled = 0x000000e7, - DB_PERF_SEL_postzl_src_in_tile_rate = 0x000000e8, - DB_PERF_SEL_postzl_src_in_tile_rate_unrolled = 0x000000e9, - DB_PERF_SEL_postzl_src_out_stall = 0x000000ea, - DB_PERF_SEL_esr_ps_src_in_sends = 0x000000eb, - DB_PERF_SEL_esr_ps_src_in_stall = 0x000000ec, - DB_PERF_SEL_esr_ps_src_in_squads = 0x000000ed, - DB_PERF_SEL_esr_ps_src_in_squads_unrolled = 0x000000ee, - DB_PERF_SEL_esr_ps_src_in_tile_rate = 0x000000ef, - DB_PERF_SEL_esr_ps_src_in_tile_rate_unrolled = 0x000000f0, - DB_PERF_SEL_esr_ps_src_in_tile_rate_unrolled_to_pixel_rate = 0x000000f1, - DB_PERF_SEL_esr_ps_src_out_stall = 0x000000f2, - DB_PERF_SEL_PreZ_Samples_failing_DB = 0x000000f4, - DB_PERF_SEL_PostZ_Samples_failing_DB = 0x000000f5, - DB_PERF_SEL_flush_compressed = 0x000000f6, - DB_PERF_SEL_flush_plane_le4 = 0x000000f7, - DB_PERF_SEL_tiles_z_fully_summarized = 0x000000f8, - DB_PERF_SEL_tiles_stencil_fully_summarized = 0x000000f9, - DB_PERF_SEL_tiles_z_clear_on_expclear = 0x000000fa, - DB_PERF_SEL_tiles_s_clear_on_expclear = 0x000000fb, - DB_PERF_SEL_tiles_decomp_on_expclear = 0x000000fc, - DB_PERF_SEL_tiles_compressed_to_decompressed = 0x000000fd, - DB_PERF_SEL_Op_Pipe_Prez_Busy = 0x000000fe, - DB_PERF_SEL_Op_Pipe_Postz_Busy = 0x000000ff, - DB_PERF_SEL_di_dt_stall = 0x00000100, - DB_PERF_SEL_hiz_qtiles_culled__GFX09 = 0x00000008, - DB_PERF_SEL_his_qtiles_culled__GFX09 = 0x00000009, - DB_PERF_SEL_esr_ps_sqq_busy__GFX09 = 0x000000c2, - DB_PERF_SEL_esr_ps_sqq_stall__GFX09 = 0x000000c3, - DB_PEFF_SEL_prezl_tile_mem_stall__GFX09 = 0x000000d2, - DB_PERF_SEL_depth_bounds_qtiles_culled__GFX09 = 0x000000f3, - DB_PERF_SEL_DB_SC_quad_double_quad__GFX09 = 0x00000101, - DB_PERF_SEL_SX_DB_quad_export_quads__GFX09 = 0x00000102, - DB_PERF_SEL_SX_DB_quad_double_format__GFX09 = 0x00000103, - DB_PERF_SEL_SX_DB_quad_fast_format__GFX09 = 0x00000104, - DB_PERF_SEL_SX_DB_quad_slow_format__GFX09 = 0x00000105, - DB_PERF_SEL_DB_CB_lquad_export_quads__GFX09 = 0x00000106, - DB_PERF_SEL_DB_CB_lquad_double_format__GFX09 = 0x00000107, - DB_PERF_SEL_DB_CB_lquad_fast_format__GFX09 = 0x00000108, - DB_PERF_SEL_DB_CB_lquad_slow_format__GFX09 = 0x00000109, - DB_PERF_SEL_CB_DB_rdreq_sends__GFX09 = 0x0000010a, - DB_PERF_SEL_CB_DB_rdreq_prt_sends__GFX09 = 0x0000010b, - DB_PERF_SEL_CB_DB_wrreq_sends__GFX09 = 0x0000010c, - DB_PERF_SEL_CB_DB_wrreq_prt_sends__GFX09 = 0x0000010d, - DB_PERF_SEL_DB_CB_rdret_ack__GFX09 = 0x0000010e, - DB_PERF_SEL_DB_CB_rdret_nack__GFX09 = 0x0000010f, - DB_PERF_SEL_DB_CB_wrret_ack__GFX09 = 0x00000110, - DB_PERF_SEL_DB_CB_wrret_nack__GFX09 = 0x00000111, - Spare_274__GFX09 = 0x00000112, - Spare_275__GFX09 = 0x00000113, - Spare_276__GFX09 = 0x00000114, - Spare_277__GFX09 = 0x00000115, - Spare_278__GFX09 = 0x00000116, - Spare_279__GFX09 = 0x00000117, - Spare_280__GFX09 = 0x00000118, - Spare_281__GFX09 = 0x00000119, - Spare_282__GFX09 = 0x0000011a, - Spare_283__GFX09 = 0x0000011b, - Spare_284__GFX09 = 0x0000011c, - Spare_285__GFX09 = 0x0000011d, - Spare_286__GFX09 = 0x0000011e, - DB_PERF_SEL_DFSM_prez_killed_squad__GFX09 = 0x0000011f, - DB_PERF_SEL_DFSM_squads_in__GFX09 = 0x00000120, - DB_PERF_SEL_DFSM_full_cleared_squads_out__GFX09 = 0x00000121, - DB_PERF_SEL_DFSM_quads_in__GFX09 = 0x00000122, - DB_PERF_SEL_DFSM_fully_cleared_quads_out__GFX09 = 0x00000123, - DB_PERF_SEL_DFSM_lit_pixels_in__GFX09 = 0x00000124, - DB_PERF_SEL_DFSM_fully_cleared_pixels_out__GFX09 = 0x00000125, - DB_PERF_SEL_DFSM_lit_samples_in__GFX09 = 0x00000126, - DB_PERF_SEL_DFSM_lit_samples_out__GFX09 = 0x00000127, - DB_PERF_SEL_DFSM_cycles_above_watermark__GFX09 = 0x00000128, - DB_PERF_SEL_DFSM_cant_accept_squads_but_not_stalled_by_downstream__GFX09 = 0x00000129, - DB_PERF_SEL_DFSM_stalled_by_downstream__GFX09 = 0x0000012a, - DB_PERF_SEL_DFSM_evicted_squads_above_watermark__GFX09 = 0x0000012b, - DB_PERF_SEL_DFSM_collisions_due_to_POPS_overflow__GFX09 = 0x0000012c, - DB_PERF_SEL_DFSM_collisions_detected_within_POPS_FIFO__GFX09 = 0x0000012d, - DB_PERF_SEL_DFSM_evicted_squads_due_to_prim_watermark__GFX09 = 0x0000012e, - DB_PERF_SEL_MI_tile_req_wrack_counter_stall__GFX09 = 0x0000012f, - DB_PERF_SEL_MI_quad_req_wrack_counter_stall__GFX09 = 0x00000130, - DB_PERF_SEL_MI_zpc_req_wrack_counter_stall__GFX09 = 0x00000131, - DB_PERF_SEL_MI_psd_req_wrack_counter_stall__GFX09 = 0x00000132, - DB_PERF_SEL_unmapped_z_tile_culled__GFX09 = 0x00000133, - DB_PERF_SEL_DB_CB_tile_is_event_FLUSH_AND_INV_DB_DATA_TS__GFX09 = 0x00000134, - DB_PERF_SEL_DB_CB_tile_is_event_FLUSH_AND_INV_CB_PIXEL_DATA__GFX09 = 0x00000135, - DB_PERF_SEL_DB_CB_tile_is_event_BOTTOM_OF_PIPE_TS__GFX09 = 0x00000136, - DB_PERF_SEL_DB_CB_tile_waiting_for_perfcounter_stop_event__GFX09 = 0x00000137, - DB_PERF_SEL_DB_CB_lquad_fmt_32bpp_8pix__GFX09 = 0x00000138, - DB_PERF_SEL_DB_CB_lquad_fmt_16_16_unsigned_8pix__GFX09 = 0x00000139, - DB_PERF_SEL_DB_CB_lquad_fmt_16_16_signed_8pix__GFX09 = 0x0000013a, - DB_PERF_SEL_DB_CB_lquad_fmt_16_16_float_8pix__GFX09 = 0x0000013b, - DB_PERF_SEL_DB_CB_lquad_num_pixels_need_blending__GFX09 = 0x0000013c, - DB_PERF_SEL_DB_CB_context_dones__GFX09 = 0x0000013d, - DB_PERF_SEL_DB_CB_eop_dones__GFX09 = 0x0000013e, - DB_PERF_SEL_SX_DB_quad_all_pixels_killed__GFX09 = 0x0000013f, - DB_PERF_SEL_SX_DB_quad_all_pixels_enabled__GFX09 = 0x00000140, - DB_PERF_SEL_SX_DB_quad_need_blending_and_dst_read__GFX09 = 0x00000141, - DB_PERF_SEL_SC_DB_tile_backface__GFX09 = 0x00000142, - DB_PERF_SEL_SC_DB_quad_quads__GFX09 = 0x00000143, - DB_PERF_SEL_DB_SC_quad_quads_with_1_pixel__GFX09 = 0x00000144, - DB_PERF_SEL_DB_SC_quad_quads_with_2_pixels__GFX09 = 0x00000145, - DB_PERF_SEL_DB_SC_quad_quads_with_3_pixels__GFX09 = 0x00000146, - DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels__GFX09 = 0x00000147, - DB_PERF_SEL_DB_CB_tile_sends__GFX09_10 = 0x00000022, - DB_PERF_SEL_DB_CB_tile_busy__GFX09_10 = 0x00000023, - DB_PERF_SEL_DB_CB_tile_stalls__GFX09_10 = 0x00000024, - DB_PERF_SEL_DB_CB_lquad_sends__GFX09_10 = 0x0000002c, - DB_PERF_SEL_DB_CB_lquad_busy__GFX09_10 = 0x0000002d, - DB_PERF_SEL_DB_CB_lquad_stalls__GFX09_10 = 0x0000002e, - DB_PERF_SEL_DB_CB_lquad_quads__GFX09_10 = 0x0000002f, - DB_PERF_SEL_etr_out_cb_tile_stall__GFX09_10 = 0x000000c0, - DB_PERF_SEL_esr_ps_sqq_busy__GFX101 = 0x000000c2, - DB_PERF_SEL_esr_ps_sqq_stall__GFX101 = 0x000000c3, - DB_PERF_SEL_DB_SC_quad_lit_quad_pre_invoke__GFX101 = 0x00000101, - DB_PERF_SEL_MI_tile_req_wrack_counter_stall__GFX101 = 0x0000012f, - DB_PERF_SEL_MI_quad_req_wrack_counter_stall__GFX101 = 0x00000130, - DB_PERF_SEL_MI_zpc_req_wrack_counter_stall__GFX101 = 0x00000131, - DB_PERF_SEL_MI_psd_req_wrack_counter_stall__GFX101 = 0x00000132, - DB_PERF_SEL_DFSM_Flush_flushabit__GFX101 = 0x00000148, - DB_PERF_SEL_DFSM_Flush_flushabit_camcoord_fifo__GFX101 = 0x00000149, - DB_PERF_SEL_DFSM_Flush_flushabit_passthrough__GFX101 = 0x0000014a, - DB_PERF_SEL_DFSM_Flush_flushabit_forceflush__GFX101 = 0x0000014b, - DB_PERF_SEL_DFSM_Flush_flushabit_nearlyfull__GFX101 = 0x0000014c, - DB_PERF_SEL_DFSM_Flush_flushabit_primitivesinflightwatermark__GFX101 = 0x0000014d, - DB_PERF_SEL_DFSM_Flush_flushabit_punch_stalling__GFX101 = 0x0000014e, - DB_PERF_SEL_DFSM_Flush_flushabit_retainedtilefifo_watermark__GFX101 = 0x0000014f, - DB_PERF_SEL_DFSM_Flush_flushabit_tilesinflightwatermark__GFX101 = 0x00000150, - DB_PERF_SEL_DFSM_Flush_flushall__GFX101 = 0x00000151, - DB_PERF_SEL_DFSM_Flush_flushall_dfsmflush__GFX101 = 0x00000152, - DB_PERF_SEL_DFSM_Flush_flushall_opmodechange__GFX101 = 0x00000153, - DB_PERF_SEL_DFSM_Flush_flushall_sampleratechange__GFX101 = 0x00000154, - DB_PERF_SEL_DFSM_Flush_flushall_watchdog__GFX101 = 0x00000155, - DB_PERF_SEL_FG_LOB_FWDR_TIMEOUT_hits__GFX101 = 0x00000171, - DB_PERF_SEL_esr_ps_sqq_busy__GFX103 = 0x000000c2, - DB_PERF_SEL_esr_ps_sqq_stall__GFX103 = 0x000000c3, - DB_PERF_SEL_DB_SC_quad_lit_quad_pre_kill__GFX103 = 0x00000101, - DB_PERF_SEL_MI_tile_req_wrack_counter_stall__GFX103 = 0x0000012f, - DB_PERF_SEL_MI_quad_req_wrack_counter_stall__GFX103 = 0x00000130, - DB_PERF_SEL_MI_zpc_req_wrack_counter_stall__GFX103 = 0x00000131, - DB_PERF_SEL_MI_psd_req_wrack_counter_stall__GFX103 = 0x00000132, - DB_PERF_SEL_DFSM_Flush_flushabit__GFX103 = 0x00000148, - DB_PERF_SEL_DFSM_Flush_flushabit_camcoord_fifo__GFX103 = 0x00000149, - DB_PERF_SEL_DFSM_Flush_flushabit_passthrough__GFX103 = 0x0000014a, - DB_PERF_SEL_DFSM_Flush_flushabit_forceflush__GFX103 = 0x0000014b, - DB_PERF_SEL_DFSM_Flush_flushabit_nearlyfull__GFX103 = 0x0000014c, - DB_PERF_SEL_DFSM_Flush_flushabit_primitivesinflightwatermark__GFX103 = 0x0000014d, - DB_PERF_SEL_DFSM_Flush_flushabit_punch_stalling__GFX103 = 0x0000014e, - DB_PERF_SEL_DFSM_Flush_flushabit_retainedtilefifo_watermark__GFX103 = 0x0000014f, - DB_PERF_SEL_DFSM_Flush_flushabit_tilesinflightwatermark__GFX103 = 0x00000150, - DB_PERF_SEL_DFSM_Flush_flushall__GFX103 = 0x00000151, - DB_PERF_SEL_DFSM_Flush_flushall_dfsmflush__GFX103 = 0x00000152, - DB_PERF_SEL_DFSM_Flush_flushall_opmodechange__GFX103 = 0x00000153, - DB_PERF_SEL_DFSM_Flush_flushall_sampleratechange__GFX103 = 0x00000154, - DB_PERF_SEL_DFSM_Flush_flushall_watchdog__GFX103 = 0x00000155, - DB_PERF_SEL_FG_LOB_FWDR_TIMEOUT_hits__GFX103 = 0x00000171, - DB_PERF_SEL_noz_waiting_for_postz_done__GFX103 = 0x00000172, - DB_PERF_SEL_DB_CB_lquad_quads_vrs_rate_1x1__GFX103 = 0x00000173, - DB_PERF_SEL_DB_CB_lquad_quads_vrs_rate_2x1__GFX103 = 0x00000174, - DB_PERF_SEL_DB_CB_lquad_quads_vrs_rate_1x2__GFX103 = 0x00000175, - RMI_rd_tile_32byte_req__GFX103 = 0x00000176, - RMI_rd_z_32byte_req__GFX103 = 0x00000177, - RMI_rd_s_32byte_req__GFX103 = 0x00000178, - RMI_wr_tile_32byte_req__GFX103 = 0x00000179, - RMI_wr_z_32byte_req__GFX103 = 0x0000017a, - RMI_wr_s_32byte_req__GFX103 = 0x0000017b, - RMI_wr_psdzpc_32byte_req__GFX103 = 0x0000017c, - RMI_rd_tile_32byte_ret__GFX103 = 0x0000017d, - RMI_rd_z_32byte_ret__GFX103 = 0x0000017e, - RMI_rd_s_32byte_ret__GFX103 = 0x0000017f, - RMI_wr_tile_32byte_ack__GFX103 = 0x00000180, - RMI_wr_z_32byte_ack__GFX103 = 0x00000181, - RMI_wr_s_32byte_ack__GFX103 = 0x00000182, - RMI_wr_psdzpc_32byte_ack__GFX103 = 0x00000183, - DB_PERF_SEL_DB_CB_lquad_quads_vrs_rate_2x2__GFX103 = 0x00000184, - DB_PERF_SEL_prez_ps_invoked_pixel_cnt__GFX103 = 0x00000185, - DB_PERF_SEL_postz_ps_invoked_pixel_cnt__GFX103 = 0x00000186, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - DB_PERF_SEL_esr_ps_vic_busy__GFX104PLUS = 0x000000c2, - DB_PERF_SEL_esr_ps_vic_stall__GFX104PLUS = 0x000000c3, - DB_PERF_SEL_CB_DB_rdreq_sends__GFX104PLUS = 0x00000109, - DB_PERF_SEL_CB_DB_rdreq_prt_sends__GFX104PLUS = 0x0000010a, - DB_PERF_SEL_CB_DB_wrreq_sends__GFX104PLUS = 0x0000010b, - DB_PERF_SEL_CB_DB_wrreq_prt_sends__GFX104PLUS = 0x0000010c, - DB_PERF_SEL_DB_CB_rdret_ack__GFX104PLUS = 0x0000010d, - DB_PERF_SEL_DB_CB_rdret_nack__GFX104PLUS = 0x0000010e, - DB_PERF_SEL_DB_CB_wrret_ack__GFX104PLUS = 0x0000010f, - DB_PERF_SEL_DB_CB_wrret_nack__GFX104PLUS = 0x00000110, - DB_PERF_SEL_unmapped_z_tile_culled__GFX104PLUS = 0x00000115, - DB_PERF_SEL_DB_CB_context_dones__GFX104PLUS = 0x0000011f, - DB_PERF_SEL_DB_CB_eop_dones__GFX104PLUS = 0x00000120, - DB_PERF_SEL_SX_DB_quad_all_pixels_killed__GFX104PLUS = 0x00000121, - DB_PERF_SEL_SX_DB_quad_all_pixels_enabled__GFX104PLUS = 0x00000122, - DB_PERF_SEL_SX_DB_quad_need_blending_and_dst_read__GFX104PLUS = 0x00000123, - DB_PERF_SEL_SC_DB_tile_backface__GFX104PLUS = 0x00000124, - DB_PERF_SEL_SC_DB_quad_quads__GFX104PLUS = 0x00000125, - DB_PERF_SEL_DB_SC_quad_quads_with_1_pixel__GFX104PLUS = 0x00000126, - DB_PERF_SEL_DB_SC_quad_quads_with_2_pixels__GFX104PLUS = 0x00000127, - DB_PERF_SEL_DB_SC_quad_quads_with_3_pixels__GFX104PLUS = 0x00000128, - DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels__GFX104PLUS = 0x00000129, - DB_PERF_SEL_DB_SC_quad_double_quad__GFX104PLUS = 0x0000012a, - DB_PERF_SEL_SX_DB_quad_export_quads__GFX104PLUS = 0x0000012b, - DB_PERF_SEL_SX_DB_quad_double_format__GFX104PLUS = 0x0000012c, - DB_PERF_SEL_SX_DB_quad_fast_format__GFX104PLUS = 0x0000012d, - DB_PERF_SEL_SX_DB_quad_slow_format__GFX104PLUS = 0x0000012e, - DB_PERF_SEL_quad_rd_sends_unc__GFX104PLUS = 0x0000012f, - DB_PERF_SEL_quad_rd_mi_stall_unc__GFX104PLUS = 0x00000130, - DB_PERF_SEL_SC_DB_tile_tiles_pipe0__GFX104PLUS = 0x00000131, - DB_PERF_SEL_SC_DB_tile_tiles_pipe1__GFX104PLUS = 0x00000132, - DB_PERF_SEL_SC_DB_quad_quads_pipe0__GFX104PLUS = 0x00000133, - DB_PERF_SEL_SC_DB_quad_quads_pipe1__GFX104PLUS = 0x00000134, - DB_PERF_SEL_PERF_fg_lob_fwdr_timeout_hits__GFX104PLUS = 0x00000135, - DB_PERF_SEL_RMI_rd_tile_32byte_req__GFX104PLUS = 0x0000013b, - DB_PERF_SEL_RMI_rd_z_32byte_req__GFX104PLUS = 0x0000013c, - DB_PERF_SEL_RMI_rd_s_32byte_req__GFX104PLUS = 0x0000013d, - DB_PERF_SEL_RMI_wr_tile_32byte_req__GFX104PLUS = 0x0000013e, - DB_PERF_SEL_RMI_wr_z_32byte_req__GFX104PLUS = 0x0000013f, - DB_PERF_SEL_RMI_wr_s_32byte_req__GFX104PLUS = 0x00000140, - DB_PERF_SEL_RMI_wr_psdzpc_32byte_req__GFX104PLUS = 0x00000141, - DB_PERF_SEL_RMI_rd_tile_32byte_ret__GFX104PLUS = 0x00000142, - DB_PERF_SEL_RMI_rd_z_32byte_ret__GFX104PLUS = 0x00000143, - DB_PERF_SEL_RMI_rd_s_32byte_ret__GFX104PLUS = 0x00000144, - DB_PERF_SEL_RMI_wr_tile_32byte_ack__GFX104PLUS = 0x00000145, - DB_PERF_SEL_RMI_wr_z_32byte_ack__GFX104PLUS = 0x00000146, - DB_PERF_SEL_RMI_wr_s_32byte_ack__GFX104PLUS = 0x00000147, - DB_PERF_SEL_RMI_wr_psdzpc_32byte_ack__GFX104PLUS = 0x00000148, - DB_PERF_SEL_esr_vic_sqq_busy__GFX104PLUS = 0x00000149, - DB_PERF_SEL_esr_vic_sqq_stall__GFX104PLUS = 0x0000014a, - DB_PERF_SEL_esr_psi_vic_tile_rate__GFX104PLUS = 0x0000014b, - DB_PERF_SEL_esr_vic_footprint_match_2x2__GFX104PLUS = 0x0000014c, - DB_PERF_SEL_esr_vic_footprint_match_2x1__GFX104PLUS = 0x0000014d, - DB_PERF_SEL_esr_vic_footprint_match_1x2__GFX104PLUS = 0x0000014e, - DB_PERF_SEL_DB_SC_quad_num_null_2x2_coarse_pixels__GFX104PLUS = 0x0000014f, - DB_PERF_SEL_DB_SC_quad_num_null_2x1_coarse_pixels__GFX104PLUS = 0x00000150, - DB_PERF_SEL_DB_SC_quad_num_null_1x2_coarse_pixels__GFX104PLUS = 0x00000151, - DB_PERF_SEL_hi_z_s_checker_force_coarse_vrs_1x1__GFX104PLUS = 0x00000152, - DB_PERF_SEL_hi_z_s_checker_force_ssaa_vrs_1x1__GFX104PLUS = 0x00000153, - DB_PERF_SEL_esr_ps_woc_1squadIn_2squadOut__GFX104PLUS = 0x00000154, - DB_PERF_SEL_esr_ps_woc_2squadIn_1squadOut__GFX104PLUS = 0x00000155, -#endif - Spare_261__GFX10CORE = 0x00000105, - DB_PERF_SEL_DB_CB_lquad_export_quads__GFX10CORE = 0x00000106, - DB_PERF_SEL_DB_CB_lquad_double_format__GFX10CORE = 0x00000107, - DB_PERF_SEL_DB_CB_lquad_fast_format__GFX10CORE = 0x00000108, - DB_PERF_SEL_DB_CB_lquad_slow_format__GFX10CORE = 0x00000109, - DB_PERF_SEL_CB_DB_rdreq_sends__GFX10CORE = 0x0000010a, - DB_PERF_SEL_CB_DB_rdreq_prt_sends__GFX10CORE = 0x0000010b, - DB_PERF_SEL_CB_DB_wrreq_sends__GFX10CORE = 0x0000010c, - DB_PERF_SEL_CB_DB_wrreq_prt_sends__GFX10CORE = 0x0000010d, - DB_PERF_SEL_DB_CB_rdret_ack__GFX10CORE = 0x0000010e, - DB_PERF_SEL_DB_CB_rdret_nack__GFX10CORE = 0x0000010f, - DB_PERF_SEL_DB_CB_wrret_ack__GFX10CORE = 0x00000110, - DB_PERF_SEL_DB_CB_wrret_nack__GFX10CORE = 0x00000111, - Spare_274__GFX10CORE = 0x00000112, - DB_PERF_SEL_DFSM_Stall_opmode_change__GFX10CORE = 0x00000113, - DB_PERF_SEL_DFSM_Stall_cam_fifo__GFX10CORE = 0x00000114, - DB_PERF_SEL_DFSM_Stall_bypass_fifo__GFX10CORE = 0x00000115, - DB_PERF_SEL_DFSM_Stall_retained_tile_fifo__GFX10CORE = 0x00000116, - DB_PERF_SEL_DFSM_Stall_control_fifo__GFX10CORE = 0x00000117, - DB_PERF_SEL_DFSM_Stall_overflow_counter__GFX10CORE = 0x00000118, - DB_PERF_SEL_DFSM_Stall_pops_stall_overflow__GFX10CORE = 0x00000119, - DB_PERF_SEL_DFSM_Stall_pops_stall_self_flush__GFX10CORE = 0x0000011a, - DB_PERF_SEL_DFSM_Stall_middle_output__GFX10CORE = 0x0000011b, - DB_PERF_SEL_DFSM_Stall_stalling_general__GFX10CORE = 0x0000011c, - Spare_285__GFX10CORE = 0x0000011d, - Spare_286__GFX10CORE = 0x0000011e, - DB_PERF_SEL_DFSM_prez_killed_squad__GFX10CORE = 0x0000011f, - DB_PERF_SEL_DFSM_squads_in__GFX10CORE = 0x00000120, - DB_PERF_SEL_DFSM_full_cleared_squads_out__GFX10CORE = 0x00000121, - DB_PERF_SEL_DFSM_quads_in__GFX10CORE = 0x00000122, - DB_PERF_SEL_DFSM_fully_cleared_quads_out__GFX10CORE = 0x00000123, - DB_PERF_SEL_DFSM_lit_pixels_in__GFX10CORE = 0x00000124, - DB_PERF_SEL_DFSM_fully_cleared_pixels_out__GFX10CORE = 0x00000125, - DB_PERF_SEL_DFSM_lit_samples_in__GFX10CORE = 0x00000126, - DB_PERF_SEL_DFSM_lit_samples_out__GFX10CORE = 0x00000127, - DB_PERF_SEL_DFSM_evicted_tiles_above_watermark__GFX10CORE = 0x00000128, - DB_PERF_SEL_DFSM_cant_accept_squads_but_not_stalled_by_downstream__GFX10CORE = 0x00000129, - DB_PERF_SEL_DFSM_stalled_by_downstream__GFX10CORE = 0x0000012a, - DB_PERF_SEL_DFSM_evicted_squads_above_watermark__GFX10CORE = 0x0000012b, - DB_PERF_SEL_DFSM_collisions_due_to_POPS_overflow__GFX10CORE = 0x0000012c, - DB_PERF_SEL_DFSM_collisions_detected_within_POPS_FIFO__GFX10CORE = 0x0000012d, - DB_PERF_SEL_DFSM_evicted_squads_due_to_prim_watermark__GFX10CORE = 0x0000012e, - DB_PERF_SEL_unmapped_z_tile_culled__GFX10CORE = 0x00000133, - DB_PERF_SEL_DB_CB_tile_is_event_FLUSH_AND_INV_DB_DATA_TS__GFX10CORE = 0x00000134, - DB_PERF_SEL_DB_CB_tile_is_event_FLUSH_AND_INV_CB_PIXEL_DATA__GFX10CORE = 0x00000135, - DB_PERF_SEL_DB_CB_tile_is_event_BOTTOM_OF_PIPE_TS__GFX10CORE = 0x00000136, - DB_PERF_SEL_DB_CB_tile_waiting_for_perfcounter_stop_event__GFX10CORE = 0x00000137, - DB_PERF_SEL_DB_CB_lquad_fmt_32bpp_8pix__GFX10CORE = 0x00000138, - DB_PERF_SEL_DB_CB_lquad_fmt_16_16_unsigned_8pix__GFX10CORE = 0x00000139, - DB_PERF_SEL_DB_CB_lquad_fmt_16_16_signed_8pix__GFX10CORE = 0x0000013a, - DB_PERF_SEL_DB_CB_lquad_fmt_16_16_float_8pix__GFX10CORE = 0x0000013b, - DB_PERF_SEL_DB_CB_lquad_num_pixels_need_blending__GFX10CORE = 0x0000013c, - DB_PERF_SEL_DB_CB_context_dones__GFX10CORE = 0x0000013d, - DB_PERF_SEL_DB_CB_eop_dones__GFX10CORE = 0x0000013e, - DB_PERF_SEL_SX_DB_quad_all_pixels_killed__GFX10CORE = 0x0000013f, - DB_PERF_SEL_SX_DB_quad_all_pixels_enabled__GFX10CORE = 0x00000140, - DB_PERF_SEL_SX_DB_quad_need_blending_and_dst_read__GFX10CORE = 0x00000141, - DB_PERF_SEL_SC_DB_tile_backface__GFX10CORE = 0x00000142, - DB_PERF_SEL_SC_DB_quad_quads__GFX10CORE = 0x00000143, - DB_PERF_SEL_DB_SC_quad_quads_with_1_pixel__GFX10CORE = 0x00000144, - DB_PERF_SEL_DB_SC_quad_quads_with_2_pixels__GFX10CORE = 0x00000145, - DB_PERF_SEL_DB_SC_quad_quads_with_3_pixels__GFX10CORE = 0x00000146, - DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels__GFX10CORE = 0x00000147, - DB_PERF_SEL_DB_SC_quad_double_quad__GFX10CORE = 0x00000156, - DB_PERF_SEL_SX_DB_quad_export_quads__GFX10CORE = 0x00000157, - DB_PERF_SEL_SX_DB_quad_double_format__GFX10CORE = 0x00000158, - DB_PERF_SEL_SX_DB_quad_fast_format__GFX10CORE = 0x00000159, - DB_PERF_SEL_SX_DB_quad_slow_format__GFX10CORE = 0x0000015a, - DB_PERF_SEL_quad_rd_sends_unc__GFX10CORE = 0x0000015b, - DB_PERF_SEL_quad_rd_mi_stall_unc__GFX10CORE = 0x0000015c, - DB_PERF_SEL_DFSM_OutputPunch__GFX10CORE = 0x0000015d, - DB_PERF_SEL_DFSM_OutputPops__GFX10CORE = 0x0000015e, - DB_PERF_SEL_DFSM_OutputFifo__GFX10CORE = 0x0000015f, - DB_PERF_SEL_DFSM_StallOpmodeChange__GFX10CORE = 0x00000160, - DB_PERF_SEL_DFSM_StallCAMFifoFull__GFX10CORE = 0x00000161, - DB_PERF_SEL_DFSM_StallBypassFifoFull__GFX10CORE = 0x00000162, - DB_PERF_SEL_DFSM_StallRetainedTileFifoFull__GFX10CORE = 0x00000163, - DB_PERF_SEL_DFSM_StallControlFifoFull__GFX10CORE = 0x00000164, - DB_PERF_SEL_DFSM_StallControlCountFull__GFX10CORE = 0x00000165, - DB_PERF_SEL_DFSM_StallOverflowMaximum__GFX10CORE = 0x00000166, - DB_PERF_SEL_DFSM_StallPopsStallOverflow__GFX10CORE = 0x00000167, - DB_PERF_SEL_DFSM_StallPopsStallSelfStall__GFX10CORE = 0x00000168, - DB_PERF_SEL_DFSM_StallCamSlotFlush__GFX10CORE = 0x00000169, - DB_PERF_SEL_DFSM_StallOutput__GFX10CORE = 0x0000016a, - DB_PERF_SEL_DFSM_WatchdogTrigger__GFX10CORE = 0x0000016b, - DB_PERF_SEL_DFSM_StallOnPOPSStall__GFX10CORE = 0x0000016c, - DB_PERF_SEL_SC_DB_tile_tiles_pipe0__GFX10CORE = 0x0000016d, - DB_PERF_SEL_SC_DB_tile_tiles_pipe1__GFX10CORE = 0x0000016e, - DB_PERF_SEL_SC_DB_quad_quads_pipe0__GFX10CORE = 0x0000016f, - DB_PERF_SEL_SC_DB_quad_quads_pipe1__GFX10CORE = 0x00000170, - DB_PERF_SEL_hiz_tile_culled__GFX10PLUS = 0x00000008, - DB_PERF_SEL_his_tile_culled__GFX10PLUS = 0x00000009, - DB_PERF_SEL_prezl_tile_mem_stall__GFX10PLUS = 0x000000d2, - DB_PERF_SEL_depth_bounds_tile_culled__GFX10PLUS = 0x000000f3, - DB_PERF_SEL_DB_SC_s_tile_rate__GFX10PLUS = 0x00000102, - DB_PERF_SEL_DB_SC_c_tile_rate__GFX10PLUS = 0x00000103, - DB_PERF_SEL_DB_SC_z_tile_rate__GFX10PLUS = 0x00000104, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - DB_PERF_SEL_DB_CB_export_events__GFX11 = 0x00000022, - DB_PERF_SEL_DB_CB_export_sends__GFX11 = 0x0000002c, - DB_PERF_SEL_DB_CB_export_busy__GFX11 = 0x0000002d, - DB_PERF_SEL_DB_CB_export_stalls__GFX11 = 0x0000002e, - DB_PERF_SEL_DB_CB_export_quads__GFX11 = 0x0000002f, - DB_PERF_SEL_DB_CB_export_export_quads__GFX11 = 0x00000105, - DB_PERF_SEL_DB_CB_export_double_format__GFX11 = 0x00000106, - DB_PERF_SEL_DB_CB_export_fast_format__GFX11 = 0x00000107, - DB_PERF_SEL_DB_CB_export_slow_format__GFX11 = 0x00000108, - DB_PERF_SEL_MI_tile_req_wrack_counter_stall__GFX11 = 0x00000111, - DB_PERF_SEL_MI_quad_req_wrack_counter_stall__GFX11 = 0x00000112, - DB_PERF_SEL_MI_zpc_req_wrack_counter_stall__GFX11 = 0x00000113, - DB_PERF_SEL_MI_psd_req_wrack_counter_stall__GFX11 = 0x00000114, - DB_PERF_SEL_DB_CB_export_is_event_FLUSH_AND_INV_DB_DATA_TS__GFX11 = 0x00000116, - DB_PERF_SEL_DB_CB_export_is_event_FLUSH_AND_INV_CB_PIXEL_DATA__GFX11 = 0x00000117, - DB_PERF_SEL_DB_CB_export_is_event_BOTTOM_OF_PIPE_TS__GFX11 = 0x00000118, - DB_PERF_SEL_DB_CB_export_waiting_for_perfcounter_stop_event__GFX11 = 0x00000119, - DB_PERF_SEL_DB_CB_export_fmt_32bpp_8pix__GFX11 = 0x0000011a, - DB_PERF_SEL_DB_CB_export_fmt_16_16_unsigned_8pix__GFX11 = 0x0000011b, - DB_PERF_SEL_DB_CB_export_fmt_16_16_signed_8pix__GFX11 = 0x0000011c, - DB_PERF_SEL_DB_CB_export_fmt_16_16_float_8pix__GFX11 = 0x0000011d, - DB_PERF_SEL_DB_CB_export_num_pixels_need_blending__GFX11 = 0x0000011e, - DB_PERF_SEL_noz_waiting_for_postz_done__GFX11 = 0x00000136, - DB_PERF_SEL_DB_CB_export_quads_vrs_rate_1x1__GFX11 = 0x00000137, - DB_PERF_SEL_DB_CB_export_quads_vrs_rate_2x1__GFX11 = 0x00000138, - DB_PERF_SEL_DB_CB_export_quads_vrs_rate_1x2__GFX11 = 0x00000139, - DB_PERF_SEL_DB_CB_export_quads_vrs_rate_2x2__GFX11 = 0x0000013a, - DB_PERF_SEL_prez_ps_invoked_pixel_cnt__GFX11 = 0x00000156, - DB_PERF_SEL_postz_ps_invoked_pixel_cnt__GFX11 = 0x00000157, - DB_PERF_SEL_DB_SC_quad_noz_tiles__GFX11 = 0x0000015b, - DB_PERF_SEL_DB_SC_quad_lit_noz_quad__GFX11 = 0x0000015c, - DB_PERF_SEL_DB_SC_quad_conflicts__GFX11 = 0x0000015d, - DB_PERF_SEL_SC_DB_quad_vrs_1x1__GFX11 = 0x0000015e, - DB_PERF_SEL_SC_DB_quad_vrs_1x2__GFX11 = 0x0000015f, - DB_PERF_SEL_SC_DB_quad_vrs_2x1__GFX11 = 0x00000160, - DB_PERF_SEL_SC_DB_quad_vrs_2x2__GFX11 = 0x00000161, - DB_PERF_SEL_SC_DB_quad_vrs_2x_ssaa__GFX11 = 0x00000162, - DB_PERF_SEL_SC_DB_quad_vrs_4x_ssaa__GFX11 = 0x00000163, - DB_PERF_SEL_SC_DB_quad_vrs_8x_ssaa__GFX11 = 0x00000164, - DB_PERF_SEL_SC_DB_wave_sends__GFX11 = 0x00000165, - DB_PERF_SEL_SC_DB_wave_busy__GFX11 = 0x00000166, - DB_PERF_SEL_SC_DB_wave_quads__GFX11 = 0x00000167, - DB_PERF_SEL_SC_DB_wave_id_wrapped__GFX11 = 0x00000168, - DB_PERF_SEL_DB_SC_wave_sends__GFX11 = 0x00000169, - DB_PERF_SEL_DB_SC_wave_busy__GFX11 = 0x0000016a, - DB_PERF_SEL_DB_SC_wave_stalls__GFX11 = 0x0000016b, - DB_PERF_SEL_DB_SC_wave_conflict__GFX11 = 0x0000016c, - DB_PERF_SEL_DB_SC_wave_hard_conflict__GFX11 = 0x0000016d, - DB_PERF_SEL_DB_SC_wave_id_wrapped__GFX11 = 0x0000016e, - DB_PERF_SEL_SX_DB_quad_waves__GFX11 = 0x0000016f, - DB_PERF_SEL_OREO_TT_load__GFX11 = 0x00000173, - DB_PERF_SEL_OREO_TT_read__GFX11 = 0x00000174, - DB_PERF_SEL_OREO_TT_stalls__GFX11 = 0x00000175, - DB_PERF_SEL_OREO_ST_load__GFX11 = 0x00000176, - DB_PERF_SEL_OREO_ST_read__GFX11 = 0x00000177, - DB_PERF_SEL_OREO_ST_stalls__GFX11 = 0x00000178, - DB_PERF_SEL_OREO_WT_load__GFX11 = 0x00000179, - DB_PERF_SEL_OREO_WT_read__GFX11 = 0x0000017a, - DB_PERF_SEL_OREO_SB_misses__GFX11 = 0x0000017b, - DB_PERF_SEL_OREO_SB_hits__GFX11 = 0x0000017c, - DB_PERF_SEL_OREO_SB_evicts__GFX11 = 0x0000017d, - DB_PERF_SEL_OREO_SB_stalls__GFX11 = 0x0000017e, - DB_PERF_SEL_OREO_Events_load__GFX11 = 0x0000017f, - DB_PERF_SEL_OREO_Events_transition__GFX11 = 0x00000180, - DB_PERF_SEL_OREO_Events_non_transition__GFX11 = 0x00000181, - DB_PERF_SEL_OREO_Events_delayed__GFX11 = 0x00000182, - DB_PERF_SEL_OREO_Events_stalls__GFX11 = 0x00000183, -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - DB_PERF_SEL_ts_events_pws_enable__HASPWS = 0x00000158, - DB_PERF_SEL_ps_events_pws_enable__HASPWS = 0x00000159, - DB_PERF_SEL_cs_events_pws_enable__HASPWS = 0x0000015a, - DB_PERF_SEL_pws_stall__HASPWS = 0x00000170, - DB_PERF_SEL_pws_liveness_stall_dtt_tag__HASPWS = 0x00000171, - DB_PERF_SEL_pws_liveness_stall_tcp_cache_mgr__HASPWS = 0x00000172, -#endif -} PerfCounter_Vals; - -constexpr unsigned int MaxPerfcounterValsGfx09 = DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels__GFX09; -constexpr unsigned int MaxPerfcounterValsGfx101 = DB_PERF_SEL_FG_LOB_FWDR_TIMEOUT_hits__GFX101; -constexpr unsigned int MaxPerfcounterValsGfx103 = DB_PERF_SEL_postz_ps_invoked_pixel_cnt__GFX103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxPerfcounterValsGfx11 = DB_PERF_SEL_OREO_Events_stalls__GFX11; -#endif - -typedef enum PERFMON_CNTOFF_AND_OR { -#if CHIP_HDR_PHOENIX1 - PERFMON_CNTOFF_OR__APU11 = 0x00000000, - PERFMON_CNTOFF_AND__APU11 = 0x00000001, -#endif - PERFMON_CNTOFF_OR__GFX101 = 0x00000000, - PERFMON_CNTOFF_AND__GFX101 = 0x00000001, -#if CHIP_HDR_NAVI21 - PERFMON_CNTOFF_OR__NV21 = 0x00000000, - PERFMON_CNTOFF_AND__NV21 = 0x00000001, -#endif -#if CHIP_HDR_NAVI22 - PERFMON_CNTOFF_OR__NV22 = 0x00000000, - PERFMON_CNTOFF_AND__NV22 = 0x00000001, -#endif -#if CHIP_HDR_NAVI23 - PERFMON_CNTOFF_OR__NV23 = 0x00000000, - PERFMON_CNTOFF_AND__NV23 = 0x00000001, -#endif -#if CHIP_HDR_NAVI24 - PERFMON_CNTOFF_OR__NV24 = 0x00000000, - PERFMON_CNTOFF_AND__NV24 = 0x00000001, -#endif -#if CHIP_HDR_NAVI31 - PERFMON_CNTOFF_OR__NV31 = 0x00000000, - PERFMON_CNTOFF_AND__NV31 = 0x00000001, -#endif - PERFMON_CNTOFF_OR__RAPHAEL = 0x00000000, - PERFMON_CNTOFF_AND__RAPHAEL = 0x00000001, - PERFMON_CNTOFF_OR__REMBRANDT = 0x00000000, - PERFMON_CNTOFF_AND__REMBRANDT = 0x00000001, - PERFMON_CNTOFF_OR__VG12_RN = 0x00000000, - PERFMON_CNTOFF_AND__VG12_RN = 0x00000001, -} PERFMON_CNTOFF_AND_OR; - -typedef enum PERFMON_CNTOFF_INT_EN { -#if CHIP_HDR_PHOENIX1 - PERFMON_CNTOFF_INT_DISABLE__APU11 = 0x00000000, - PERFMON_CNTOFF_INT_ENABLE__APU11 = 0x00000001, -#endif - PERFMON_CNTOFF_INT_DISABLE__GFX101 = 0x00000000, - PERFMON_CNTOFF_INT_ENABLE__GFX101 = 0x00000001, -#if CHIP_HDR_NAVI21 - PERFMON_CNTOFF_INT_DISABLE__NV21 = 0x00000000, - PERFMON_CNTOFF_INT_ENABLE__NV21 = 0x00000001, -#endif -#if CHIP_HDR_NAVI22 - PERFMON_CNTOFF_INT_DISABLE__NV22 = 0x00000000, - PERFMON_CNTOFF_INT_ENABLE__NV22 = 0x00000001, -#endif -#if CHIP_HDR_NAVI23 - PERFMON_CNTOFF_INT_DISABLE__NV23 = 0x00000000, - PERFMON_CNTOFF_INT_ENABLE__NV23 = 0x00000001, -#endif -#if CHIP_HDR_NAVI24 - PERFMON_CNTOFF_INT_DISABLE__NV24 = 0x00000000, - PERFMON_CNTOFF_INT_ENABLE__NV24 = 0x00000001, -#endif -#if CHIP_HDR_NAVI31 - PERFMON_CNTOFF_INT_DISABLE__NV31 = 0x00000000, - PERFMON_CNTOFF_INT_ENABLE__NV31 = 0x00000001, -#endif - PERFMON_CNTOFF_INT_DISABLE__RAPHAEL = 0x00000000, - PERFMON_CNTOFF_INT_ENABLE__RAPHAEL = 0x00000001, - PERFMON_CNTOFF_INT_DISABLE__REMBRANDT = 0x00000000, - PERFMON_CNTOFF_INT_ENABLE__REMBRANDT = 0x00000001, - PERFMON_CNTOFF_INT_DISABLE__VG12_RN = 0x00000000, - PERFMON_CNTOFF_INT_ENABLE__VG12_RN = 0x00000001, -} PERFMON_CNTOFF_INT_EN; - -typedef enum PERFMON_CNTOFF_INT_TYPE { -#if CHIP_HDR_PHOENIX1 - PERFMON_CNTOFF_INT_TYPE_LEVEL__APU11 = 0x00000000, - PERFMON_CNTOFF_INT_TYPE_PULSE__APU11 = 0x00000001, -#endif - PERFMON_CNTOFF_INT_TYPE_LEVEL__GFX101 = 0x00000000, - PERFMON_CNTOFF_INT_TYPE_PULSE__GFX101 = 0x00000001, -#if CHIP_HDR_NAVI21 - PERFMON_CNTOFF_INT_TYPE_LEVEL__NV21 = 0x00000000, - PERFMON_CNTOFF_INT_TYPE_PULSE__NV21 = 0x00000001, -#endif -#if CHIP_HDR_NAVI22 - PERFMON_CNTOFF_INT_TYPE_LEVEL__NV22 = 0x00000000, - PERFMON_CNTOFF_INT_TYPE_PULSE__NV22 = 0x00000001, -#endif -#if CHIP_HDR_NAVI23 - PERFMON_CNTOFF_INT_TYPE_LEVEL__NV23 = 0x00000000, - PERFMON_CNTOFF_INT_TYPE_PULSE__NV23 = 0x00000001, -#endif -#if CHIP_HDR_NAVI24 - PERFMON_CNTOFF_INT_TYPE_LEVEL__NV24 = 0x00000000, - PERFMON_CNTOFF_INT_TYPE_PULSE__NV24 = 0x00000001, -#endif -#if CHIP_HDR_NAVI31 - PERFMON_CNTOFF_INT_TYPE_LEVEL__NV31 = 0x00000000, - PERFMON_CNTOFF_INT_TYPE_PULSE__NV31 = 0x00000001, -#endif - PERFMON_CNTOFF_INT_TYPE_LEVEL__RAPHAEL = 0x00000000, - PERFMON_CNTOFF_INT_TYPE_PULSE__RAPHAEL = 0x00000001, - PERFMON_CNTOFF_INT_TYPE_LEVEL__REMBRANDT = 0x00000000, - PERFMON_CNTOFF_INT_TYPE_PULSE__REMBRANDT = 0x00000001, - PERFMON_CNTOFF_INT_TYPE_LEVEL__VG12_RN = 0x00000000, - PERFMON_CNTOFF_INT_TYPE_PULSE__VG12_RN = 0x00000001, -} PERFMON_CNTOFF_INT_TYPE; - -typedef enum PERFMON_COUNTER_MODE { - PERFMON_COUNTER_MODE_ACCUM = 0x00000000, - PERFMON_COUNTER_MODE_ACTIVE_CYCLES = 0x00000001, - PERFMON_COUNTER_MODE_MAX = 0x00000002, - PERFMON_COUNTER_MODE_DIRTY = 0x00000003, - PERFMON_COUNTER_MODE_SAMPLE = 0x00000004, - PERFMON_COUNTER_MODE_CYCLES_SINCE_FIRST_EVENT = 0x00000005, - PERFMON_COUNTER_MODE_CYCLES_SINCE_LAST_EVENT = 0x00000006, - PERFMON_COUNTER_MODE_CYCLES_GE_HI = 0x00000007, - PERFMON_COUNTER_MODE_CYCLES_EQ_HI = 0x00000008, - PERFMON_COUNTER_MODE_INACTIVE_CYCLES = 0x00000009, - PERFMON_COUNTER_MODE_RESERVED = 0x0000000f, -} PERFMON_COUNTER_MODE; - -typedef enum PERFMON_SPM_MODE { - PERFMON_SPM_MODE_OFF = 0x00000000, - PERFMON_SPM_MODE_16BIT_CLAMP = 0x00000001, - PERFMON_SPM_MODE_16BIT_NO_CLAMP = 0x00000002, - PERFMON_SPM_MODE_32BIT_CLAMP = 0x00000003, - PERFMON_SPM_MODE_32BIT_NO_CLAMP = 0x00000004, - PERFMON_SPM_MODE_RESERVED_5 = 0x00000005, - PERFMON_SPM_MODE_RESERVED_6 = 0x00000006, - PERFMON_SPM_MODE_RESERVED_7 = 0x00000007, - PERFMON_SPM_MODE_TEST_MODE_0 = 0x00000008, - PERFMON_SPM_MODE_TEST_MODE_1 = 0x00000009, - PERFMON_SPM_MODE_TEST_MODE_2 = 0x0000000a, -} PERFMON_SPM_MODE; - -typedef enum PERFMON_STATE { -#if CHIP_HDR_PHOENIX1 - PERFMON_STATE_RESET__APU11 = 0x00000000, - PERFMON_STATE_START__APU11 = 0x00000001, - PERFMON_STATE_FREEZE__APU11 = 0x00000002, - PERFMON_STATE_HW__APU11 = 0x00000003, -#endif - PERFMON_STATE_RESET__GFX101 = 0x00000000, - PERFMON_STATE_START__GFX101 = 0x00000001, - PERFMON_STATE_FREEZE__GFX101 = 0x00000002, - PERFMON_STATE_HW__GFX101 = 0x00000003, -#if CHIP_HDR_NAVI21 - PERFMON_STATE_RESET__NV21 = 0x00000000, - PERFMON_STATE_START__NV21 = 0x00000001, - PERFMON_STATE_FREEZE__NV21 = 0x00000002, - PERFMON_STATE_HW__NV21 = 0x00000003, -#endif -#if CHIP_HDR_NAVI22 - PERFMON_STATE_RESET__NV22 = 0x00000000, - PERFMON_STATE_START__NV22 = 0x00000001, - PERFMON_STATE_FREEZE__NV22 = 0x00000002, - PERFMON_STATE_HW__NV22 = 0x00000003, -#endif -#if CHIP_HDR_NAVI23 - PERFMON_STATE_RESET__NV23 = 0x00000000, - PERFMON_STATE_START__NV23 = 0x00000001, - PERFMON_STATE_FREEZE__NV23 = 0x00000002, - PERFMON_STATE_HW__NV23 = 0x00000003, -#endif -#if CHIP_HDR_NAVI24 - PERFMON_STATE_RESET__NV24 = 0x00000000, - PERFMON_STATE_START__NV24 = 0x00000001, - PERFMON_STATE_FREEZE__NV24 = 0x00000002, - PERFMON_STATE_HW__NV24 = 0x00000003, -#endif -#if CHIP_HDR_NAVI31 - PERFMON_STATE_RESET__NV31 = 0x00000000, - PERFMON_STATE_START__NV31 = 0x00000001, - PERFMON_STATE_FREEZE__NV31 = 0x00000002, - PERFMON_STATE_HW__NV31 = 0x00000003, -#endif - PERFMON_STATE_RESET__RAPHAEL = 0x00000000, - PERFMON_STATE_START__RAPHAEL = 0x00000001, - PERFMON_STATE_FREEZE__RAPHAEL = 0x00000002, - PERFMON_STATE_HW__RAPHAEL = 0x00000003, - PERFMON_STATE_RESET__REMBRANDT = 0x00000000, - PERFMON_STATE_START__REMBRANDT = 0x00000001, - PERFMON_STATE_FREEZE__REMBRANDT = 0x00000002, - PERFMON_STATE_HW__REMBRANDT = 0x00000003, - PERFMON_STATE_RESET__VG12_RN = 0x00000000, - PERFMON_STATE_START__VG12_RN = 0x00000001, - PERFMON_STATE_FREEZE__VG12_RN = 0x00000002, - PERFMON_STATE_HW__VG12_RN = 0x00000003, -} PERFMON_STATE; - -typedef enum PH_PERFCNT_SEL { - PH_PERF_SEL_SC0_SRPS_WINDOW_VALID = 0x00000000, - PH_PERF_SEL_SC0_ARB_XFC_ALL_EVENT_OR_PRIM_CYCLES = 0x00000001, - PH_PERF_SEL_SC0_ARB_XFC_ONLY_PRIM_CYCLES = 0x00000002, - PH_PERF_SEL_SC0_ARB_XFC_ONLY_ONE_INC_PER_PRIM = 0x00000003, - PH_PERF_SEL_SC0_ARB_STALLED_FROM_BELOW = 0x00000004, - PH_PERF_SEL_SC0_ARB_STARVED_FROM_ABOVE = 0x00000005, - PH_PERF_SEL_SC0_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY = 0x00000006, - PH_PERF_SEL_SC0_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL = 0x00000007, - PH_PERF_SEL_SC0_ARB_BUSY = 0x00000008, - PH_PERF_SEL_SC0_ARB_PA_BUSY_SOP = 0x00000009, - PH_PERF_SEL_SC0_ARB_EOP_POP_SYNC_POP = 0x0000000a, - PH_PERF_SEL_SC0_ARB_EVENT_SYNC_POP = 0x0000000b, - PH_PERF_SEL_SC0_PS_ENG_MULTICYCLE_BUBBLE = 0x0000000c, - PH_PERF_SEL_SC0_EOP_SYNC_WINDOW = 0x0000000d, - PH_PERF_SEL_SC0_BUSY_PROCESSING_MULTICYCLE_PRIM = 0x0000000e, - PH_PERF_SEL_SC0_BUSY_CNT_NOT_ZERO = 0x0000000f, - PH_PERF_SEL_SC0_SEND = 0x00000010, - PH_PERF_SEL_SC0_CREDIT_AT_ZERO_WITH_PENDING_SEND = 0x00000011, - PH_PERF_SEL_SC0_CREDIT_AT_MAX = 0x00000012, - PH_PERF_SEL_SC0_CREDIT_AT_MAX_NO_PENDING_SEND = 0x00000013, - PH_PERF_SEL_SC0_PA0_DATA_FIFO_RD = 0x00000018, - PH_PERF_SEL_SC0_PA0_DATA_FIFO_WE = 0x00000019, - PH_PERF_SEL_SC0_PA0_FIFO_EMPTY = 0x0000001a, - PH_PERF_SEL_SC0_PA0_FIFO_FULL = 0x0000001b, - PH_PERF_SEL_SC0_PA0_NULL_WE = 0x0000001c, - PH_PERF_SEL_SC0_PA0_EVENT_WE = 0x0000001d, - PH_PERF_SEL_SC0_PA0_FPOV_WE = 0x0000001e, - PH_PERF_SEL_SC0_PA0_EOP_WE = 0x00000020, - PH_PERF_SEL_SC0_PA0_DATA_FIFO_EOP_RD = 0x00000021, - PH_PERF_SEL_SC0_PA0_EOPG_WE = 0x00000022, - PH_PERF_SEL_SC0_PA1_DATA_FIFO_RD = 0x00000024, - PH_PERF_SEL_SC0_PA1_DATA_FIFO_WE = 0x00000025, - PH_PERF_SEL_SC0_PA1_FIFO_EMPTY = 0x00000026, - PH_PERF_SEL_SC0_PA1_FIFO_FULL = 0x00000027, - PH_PERF_SEL_SC0_PA1_NULL_WE = 0x00000028, - PH_PERF_SEL_SC0_PA1_EVENT_WE = 0x00000029, - PH_PERF_SEL_SC0_PA1_FPOV_WE = 0x0000002a, - PH_PERF_SEL_SC0_PA1_EOP_WE = 0x0000002c, - PH_PERF_SEL_SC0_PA1_DATA_FIFO_EOP_RD = 0x0000002d, - PH_PERF_SEL_SC0_PA1_EOPG_WE = 0x0000002e, - PH_PERF_SEL_SC0_PA2_DATA_FIFO_RD = 0x00000030, - PH_PERF_SEL_SC0_PA2_DATA_FIFO_WE = 0x00000031, - PH_PERF_SEL_SC0_PA2_FIFO_EMPTY = 0x00000032, - PH_PERF_SEL_SC0_PA2_FIFO_FULL = 0x00000033, - PH_PERF_SEL_SC0_PA2_NULL_WE = 0x00000034, - PH_PERF_SEL_SC0_PA2_EVENT_WE = 0x00000035, - PH_PERF_SEL_SC0_PA2_FPOV_WE = 0x00000036, - PH_PERF_SEL_SC0_PA2_EOP_WE = 0x00000038, - PH_PERF_SEL_SC0_PA2_DATA_FIFO_EOP_RD = 0x00000039, - PH_PERF_SEL_SC0_PA2_EOPG_WE = 0x0000003a, - PH_PERF_SEL_SC0_PA3_DATA_FIFO_RD = 0x0000003c, - PH_PERF_SEL_SC0_PA3_DATA_FIFO_WE = 0x0000003d, - PH_PERF_SEL_SC0_PA3_FIFO_EMPTY = 0x0000003e, - PH_PERF_SEL_SC0_PA3_FIFO_FULL = 0x0000003f, - PH_PERF_SEL_SC0_PA3_NULL_WE = 0x00000040, - PH_PERF_SEL_SC0_PA3_EVENT_WE = 0x00000041, - PH_PERF_SEL_SC0_PA3_FPOV_WE = 0x00000042, - PH_PERF_SEL_SC0_PA3_EOP_WE = 0x00000044, - PH_PERF_SEL_SC0_PA3_DATA_FIFO_EOP_RD = 0x00000045, - PH_PERF_SEL_SC0_PA3_EOPG_WE = 0x00000046, - PH_PERF_SEL_SC0_PA4_DATA_FIFO_RD = 0x00000048, - PH_PERF_SEL_SC0_PA4_DATA_FIFO_WE = 0x00000049, - PH_PERF_SEL_SC0_PA4_FIFO_EMPTY = 0x0000004a, - PH_PERF_SEL_SC0_PA4_FIFO_FULL = 0x0000004b, - PH_PERF_SEL_SC0_PA4_NULL_WE = 0x0000004c, - PH_PERF_SEL_SC0_PA4_EVENT_WE = 0x0000004d, - PH_PERF_SEL_SC0_PA4_FPOV_WE = 0x0000004e, - PH_PERF_SEL_SC0_PA4_EOP_WE = 0x00000050, - PH_PERF_SEL_SC0_PA4_DATA_FIFO_EOP_RD = 0x00000051, - PH_PERF_SEL_SC0_PA4_EOPG_WE = 0x00000052, - PH_PERF_SEL_SC0_PA5_DATA_FIFO_RD = 0x00000054, - PH_PERF_SEL_SC0_PA5_DATA_FIFO_WE = 0x00000055, - PH_PERF_SEL_SC0_PA5_FIFO_EMPTY = 0x00000056, - PH_PERF_SEL_SC0_PA5_FIFO_FULL = 0x00000057, - PH_PERF_SEL_SC0_PA5_NULL_WE = 0x00000058, - PH_PERF_SEL_SC0_PA5_EVENT_WE = 0x00000059, - PH_PERF_SEL_SC0_PA5_FPOV_WE = 0x0000005a, - PH_PERF_SEL_SC0_PA5_EOP_WE = 0x0000005c, - PH_PERF_SEL_SC0_PA5_DATA_FIFO_EOP_RD = 0x0000005d, - PH_PERF_SEL_SC0_PA5_EOPG_WE = 0x0000005e, - PH_PERF_SEL_SC0_PA6_DATA_FIFO_RD = 0x00000060, - PH_PERF_SEL_SC0_PA6_DATA_FIFO_WE = 0x00000061, - PH_PERF_SEL_SC0_PA6_FIFO_EMPTY = 0x00000062, - PH_PERF_SEL_SC0_PA6_FIFO_FULL = 0x00000063, - PH_PERF_SEL_SC0_PA6_NULL_WE = 0x00000064, - PH_PERF_SEL_SC0_PA6_EVENT_WE = 0x00000065, - PH_PERF_SEL_SC0_PA6_FPOV_WE = 0x00000066, - PH_PERF_SEL_SC0_PA6_EOP_WE = 0x00000068, - PH_PERF_SEL_SC0_PA6_DATA_FIFO_EOP_RD = 0x00000069, - PH_PERF_SEL_SC0_PA6_EOPG_WE = 0x0000006a, - PH_PERF_SEL_SC0_PA7_DATA_FIFO_RD = 0x0000006c, - PH_PERF_SEL_SC0_PA7_DATA_FIFO_WE = 0x0000006d, - PH_PERF_SEL_SC0_PA7_FIFO_EMPTY = 0x0000006e, - PH_PERF_SEL_SC0_PA7_FIFO_FULL = 0x0000006f, - PH_PERF_SEL_SC0_PA7_NULL_WE = 0x00000070, - PH_PERF_SEL_SC0_PA7_EVENT_WE = 0x00000071, - PH_PERF_SEL_SC0_PA7_FPOV_WE = 0x00000072, - PH_PERF_SEL_SC0_PA7_EOP_WE = 0x00000074, - PH_PERF_SEL_SC0_PA7_DATA_FIFO_EOP_RD = 0x00000075, - PH_PERF_SEL_SC0_PA7_EOPG_WE = 0x00000076, - PH_PERF_SEL_SC1_SRPS_WINDOW_VALID = 0x00000078, - PH_PERF_SEL_SC1_ARB_XFC_ALL_EVENT_OR_PRIM_CYCLES = 0x00000079, - PH_PERF_SEL_SC1_ARB_XFC_ONLY_PRIM_CYCLES = 0x0000007a, - PH_PERF_SEL_SC1_ARB_XFC_ONLY_ONE_INC_PER_PRIM = 0x0000007b, - PH_PERF_SEL_SC1_ARB_STALLED_FROM_BELOW = 0x0000007c, - PH_PERF_SEL_SC1_ARB_STARVED_FROM_ABOVE = 0x0000007d, - PH_PERF_SEL_SC1_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY = 0x0000007e, - PH_PERF_SEL_SC1_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL = 0x0000007f, - PH_PERF_SEL_SC1_ARB_BUSY = 0x00000080, - PH_PERF_SEL_SC1_ARB_PA_BUSY_SOP = 0x00000081, - PH_PERF_SEL_SC1_ARB_EOP_POP_SYNC_POP = 0x00000082, - PH_PERF_SEL_SC1_ARB_EVENT_SYNC_POP = 0x00000083, - PH_PERF_SEL_SC1_PS_ENG_MULTICYCLE_BUBBLE = 0x00000084, - PH_PERF_SEL_SC1_EOP_SYNC_WINDOW = 0x00000085, - PH_PERF_SEL_SC1_BUSY_PROCESSING_MULTICYCLE_PRIM = 0x00000086, - PH_PERF_SEL_SC1_BUSY_CNT_NOT_ZERO = 0x00000087, - PH_PERF_SEL_SC1_SEND = 0x00000088, - PH_PERF_SEL_SC1_CREDIT_AT_ZERO_WITH_PENDING_SEND = 0x00000089, - PH_PERF_SEL_SC1_CREDIT_AT_MAX = 0x0000008a, - PH_PERF_SEL_SC1_CREDIT_AT_MAX_NO_PENDING_SEND = 0x0000008b, - PH_PERF_SEL_SC1_PA0_DATA_FIFO_RD = 0x00000090, - PH_PERF_SEL_SC1_PA0_DATA_FIFO_WE = 0x00000091, - PH_PERF_SEL_SC1_PA0_FIFO_EMPTY = 0x00000092, - PH_PERF_SEL_SC1_PA0_FIFO_FULL = 0x00000093, - PH_PERF_SEL_SC1_PA0_NULL_WE = 0x00000094, - PH_PERF_SEL_SC1_PA0_EVENT_WE = 0x00000095, - PH_PERF_SEL_SC1_PA0_FPOV_WE = 0x00000096, - PH_PERF_SEL_SC1_PA0_EOP_WE = 0x00000098, - PH_PERF_SEL_SC1_PA0_DATA_FIFO_EOP_RD = 0x00000099, - PH_PERF_SEL_SC1_PA0_EOPG_WE = 0x0000009a, - PH_PERF_SEL_SC1_PA1_DATA_FIFO_RD = 0x0000009c, - PH_PERF_SEL_SC1_PA1_DATA_FIFO_WE = 0x0000009d, - PH_PERF_SEL_SC1_PA1_FIFO_EMPTY = 0x0000009e, - PH_PERF_SEL_SC1_PA1_FIFO_FULL = 0x0000009f, - PH_PERF_SEL_SC1_PA1_NULL_WE = 0x000000a0, - PH_PERF_SEL_SC1_PA1_EVENT_WE = 0x000000a1, - PH_PERF_SEL_SC1_PA1_FPOV_WE = 0x000000a2, - PH_PERF_SEL_SC1_PA1_EOP_WE = 0x000000a4, - PH_PERF_SEL_SC1_PA1_DATA_FIFO_EOP_RD = 0x000000a5, - PH_PERF_SEL_SC1_PA1_EOPG_WE = 0x000000a6, - PH_PERF_SEL_SC1_PA2_DATA_FIFO_RD = 0x000000a8, - PH_PERF_SEL_SC1_PA2_DATA_FIFO_WE = 0x000000a9, - PH_PERF_SEL_SC1_PA2_FIFO_EMPTY = 0x000000aa, - PH_PERF_SEL_SC1_PA2_FIFO_FULL = 0x000000ab, - PH_PERF_SEL_SC1_PA2_NULL_WE = 0x000000ac, - PH_PERF_SEL_SC1_PA2_EVENT_WE = 0x000000ad, - PH_PERF_SEL_SC1_PA2_FPOV_WE = 0x000000ae, - PH_PERF_SEL_SC1_PA2_EOP_WE = 0x000000b0, - PH_PERF_SEL_SC1_PA2_DATA_FIFO_EOP_RD = 0x000000b1, - PH_PERF_SEL_SC1_PA2_EOPG_WE = 0x000000b2, - PH_PERF_SEL_SC1_PA3_DATA_FIFO_RD = 0x000000b4, - PH_PERF_SEL_SC1_PA3_DATA_FIFO_WE = 0x000000b5, - PH_PERF_SEL_SC1_PA3_FIFO_EMPTY = 0x000000b6, - PH_PERF_SEL_SC1_PA3_FIFO_FULL = 0x000000b7, - PH_PERF_SEL_SC1_PA3_NULL_WE = 0x000000b8, - PH_PERF_SEL_SC1_PA3_EVENT_WE = 0x000000b9, - PH_PERF_SEL_SC1_PA3_FPOV_WE = 0x000000ba, - PH_PERF_SEL_SC1_PA3_EOP_WE = 0x000000bc, - PH_PERF_SEL_SC1_PA3_DATA_FIFO_EOP_RD = 0x000000bd, - PH_PERF_SEL_SC1_PA3_EOPG_WE = 0x000000be, - PH_PERF_SEL_SC1_PA4_DATA_FIFO_RD = 0x000000c0, - PH_PERF_SEL_SC1_PA4_DATA_FIFO_WE = 0x000000c1, - PH_PERF_SEL_SC1_PA4_FIFO_EMPTY = 0x000000c2, - PH_PERF_SEL_SC1_PA4_FIFO_FULL = 0x000000c3, - PH_PERF_SEL_SC1_PA4_NULL_WE = 0x000000c4, - PH_PERF_SEL_SC1_PA4_EVENT_WE = 0x000000c5, - PH_PERF_SEL_SC1_PA4_FPOV_WE = 0x000000c6, - PH_PERF_SEL_SC1_PA4_EOP_WE = 0x000000c8, - PH_PERF_SEL_SC1_PA4_DATA_FIFO_EOP_RD = 0x000000c9, - PH_PERF_SEL_SC1_PA4_EOPG_WE = 0x000000ca, - PH_PERF_SEL_SC1_PA5_DATA_FIFO_RD = 0x000000cc, - PH_PERF_SEL_SC1_PA5_DATA_FIFO_WE = 0x000000cd, - PH_PERF_SEL_SC1_PA5_FIFO_EMPTY = 0x000000ce, - PH_PERF_SEL_SC1_PA5_FIFO_FULL = 0x000000cf, - PH_PERF_SEL_SC1_PA5_NULL_WE = 0x000000d0, - PH_PERF_SEL_SC1_PA5_EVENT_WE = 0x000000d1, - PH_PERF_SEL_SC1_PA5_FPOV_WE = 0x000000d2, - PH_PERF_SEL_SC1_PA5_EOP_WE = 0x000000d4, - PH_PERF_SEL_SC1_PA5_DATA_FIFO_EOP_RD = 0x000000d5, - PH_PERF_SEL_SC1_PA5_EOPG_WE = 0x000000d6, - PH_PERF_SEL_SC1_PA6_DATA_FIFO_RD = 0x000000d8, - PH_PERF_SEL_SC1_PA6_DATA_FIFO_WE = 0x000000d9, - PH_PERF_SEL_SC1_PA6_FIFO_EMPTY = 0x000000da, - PH_PERF_SEL_SC1_PA6_FIFO_FULL = 0x000000db, - PH_PERF_SEL_SC1_PA6_NULL_WE = 0x000000dc, - PH_PERF_SEL_SC1_PA6_EVENT_WE = 0x000000dd, - PH_PERF_SEL_SC1_PA6_FPOV_WE = 0x000000de, - PH_PERF_SEL_SC1_PA6_EOP_WE = 0x000000e0, - PH_PERF_SEL_SC1_PA6_DATA_FIFO_EOP_RD = 0x000000e1, - PH_PERF_SEL_SC1_PA6_EOPG_WE = 0x000000e2, - PH_PERF_SEL_SC1_PA7_DATA_FIFO_RD = 0x000000e4, - PH_PERF_SEL_SC1_PA7_DATA_FIFO_WE = 0x000000e5, - PH_PERF_SEL_SC1_PA7_FIFO_EMPTY = 0x000000e6, - PH_PERF_SEL_SC1_PA7_FIFO_FULL = 0x000000e7, - PH_PERF_SEL_SC1_PA7_NULL_WE = 0x000000e8, - PH_PERF_SEL_SC1_PA7_EVENT_WE = 0x000000e9, - PH_PERF_SEL_SC1_PA7_FPOV_WE = 0x000000ea, - PH_PERF_SEL_SC1_PA7_EOP_WE = 0x000000ec, - PH_PERF_SEL_SC1_PA7_DATA_FIFO_EOP_RD = 0x000000ed, - PH_PERF_SEL_SC1_PA7_EOPG_WE = 0x000000ee, - PH_PERF_SEL_SC2_SRPS_WINDOW_VALID = 0x000000f0, - PH_PERF_SEL_SC2_ARB_XFC_ALL_EVENT_OR_PRIM_CYCLES = 0x000000f1, - PH_PERF_SEL_SC2_ARB_XFC_ONLY_PRIM_CYCLES = 0x000000f2, - PH_PERF_SEL_SC2_ARB_XFC_ONLY_ONE_INC_PER_PRIM = 0x000000f3, - PH_PERF_SEL_SC2_ARB_STALLED_FROM_BELOW = 0x000000f4, - PH_PERF_SEL_SC2_ARB_STARVED_FROM_ABOVE = 0x000000f5, - PH_PERF_SEL_SC2_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY = 0x000000f6, - PH_PERF_SEL_SC2_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL = 0x000000f7, - PH_PERF_SEL_SC2_ARB_BUSY = 0x000000f8, - PH_PERF_SEL_SC2_ARB_PA_BUSY_SOP = 0x000000f9, - PH_PERF_SEL_SC2_ARB_EOP_POP_SYNC_POP = 0x000000fa, - PH_PERF_SEL_SC2_ARB_EVENT_SYNC_POP = 0x000000fb, - PH_PERF_SEL_SC2_PS_ENG_MULTICYCLE_BUBBLE = 0x000000fc, - PH_PERF_SEL_SC2_EOP_SYNC_WINDOW = 0x000000fd, - PH_PERF_SEL_SC2_BUSY_PROCESSING_MULTICYCLE_PRIM = 0x000000fe, - PH_PERF_SEL_SC2_BUSY_CNT_NOT_ZERO = 0x000000ff, - PH_PERF_SEL_SC2_SEND = 0x00000100, - PH_PERF_SEL_SC2_CREDIT_AT_ZERO_WITH_PENDING_SEND = 0x00000101, - PH_PERF_SEL_SC2_CREDIT_AT_MAX = 0x00000102, - PH_PERF_SEL_SC2_CREDIT_AT_MAX_NO_PENDING_SEND = 0x00000103, - PH_PERF_SEL_SC2_PA0_DATA_FIFO_RD = 0x00000108, - PH_PERF_SEL_SC2_PA0_DATA_FIFO_WE = 0x00000109, - PH_PERF_SEL_SC2_PA0_FIFO_EMPTY = 0x0000010a, - PH_PERF_SEL_SC2_PA0_FIFO_FULL = 0x0000010b, - PH_PERF_SEL_SC2_PA0_NULL_WE = 0x0000010c, - PH_PERF_SEL_SC2_PA0_EVENT_WE = 0x0000010d, - PH_PERF_SEL_SC2_PA0_FPOV_WE = 0x0000010e, - PH_PERF_SEL_SC2_PA0_EOP_WE = 0x00000110, - PH_PERF_SEL_SC2_PA0_DATA_FIFO_EOP_RD = 0x00000111, - PH_PERF_SEL_SC2_PA0_EOPG_WE = 0x00000112, - PH_PERF_SEL_SC2_PA1_DATA_FIFO_RD = 0x00000114, - PH_PERF_SEL_SC2_PA1_DATA_FIFO_WE = 0x00000115, - PH_PERF_SEL_SC2_PA1_FIFO_EMPTY = 0x00000116, - PH_PERF_SEL_SC2_PA1_FIFO_FULL = 0x00000117, - PH_PERF_SEL_SC2_PA1_NULL_WE = 0x00000118, - PH_PERF_SEL_SC2_PA1_EVENT_WE = 0x00000119, - PH_PERF_SEL_SC2_PA1_FPOV_WE = 0x0000011a, - PH_PERF_SEL_SC2_PA1_EOP_WE = 0x0000011c, - PH_PERF_SEL_SC2_PA1_DATA_FIFO_EOP_RD = 0x0000011d, - PH_PERF_SEL_SC2_PA1_EOPG_WE = 0x0000011e, - PH_PERF_SEL_SC2_PA2_DATA_FIFO_RD = 0x00000120, - PH_PERF_SEL_SC2_PA2_DATA_FIFO_WE = 0x00000121, - PH_PERF_SEL_SC2_PA2_FIFO_EMPTY = 0x00000122, - PH_PERF_SEL_SC2_PA2_FIFO_FULL = 0x00000123, - PH_PERF_SEL_SC2_PA2_NULL_WE = 0x00000124, - PH_PERF_SEL_SC2_PA2_EVENT_WE = 0x00000125, - PH_PERF_SEL_SC2_PA2_FPOV_WE = 0x00000126, - PH_PERF_SEL_SC2_PA2_EOP_WE = 0x00000128, - PH_PERF_SEL_SC2_PA2_DATA_FIFO_EOP_RD = 0x00000129, - PH_PERF_SEL_SC2_PA2_EOPG_WE = 0x0000012a, - PH_PERF_SEL_SC2_PA3_DATA_FIFO_RD = 0x0000012c, - PH_PERF_SEL_SC2_PA3_DATA_FIFO_WE = 0x0000012d, - PH_PERF_SEL_SC2_PA3_FIFO_EMPTY = 0x0000012e, - PH_PERF_SEL_SC2_PA3_FIFO_FULL = 0x0000012f, - PH_PERF_SEL_SC2_PA3_NULL_WE = 0x00000130, - PH_PERF_SEL_SC2_PA3_EVENT_WE = 0x00000131, - PH_PERF_SEL_SC2_PA3_FPOV_WE = 0x00000132, - PH_PERF_SEL_SC2_PA3_EOP_WE = 0x00000134, - PH_PERF_SEL_SC2_PA3_DATA_FIFO_EOP_RD = 0x00000135, - PH_PERF_SEL_SC2_PA3_EOPG_WE = 0x00000136, - PH_PERF_SEL_SC2_PA4_DATA_FIFO_RD = 0x00000138, - PH_PERF_SEL_SC2_PA4_DATA_FIFO_WE = 0x00000139, - PH_PERF_SEL_SC2_PA4_FIFO_EMPTY = 0x0000013a, - PH_PERF_SEL_SC2_PA4_FIFO_FULL = 0x0000013b, - PH_PERF_SEL_SC2_PA4_NULL_WE = 0x0000013c, - PH_PERF_SEL_SC2_PA4_EVENT_WE = 0x0000013d, - PH_PERF_SEL_SC2_PA4_FPOV_WE = 0x0000013e, - PH_PERF_SEL_SC2_PA4_EOP_WE = 0x00000140, - PH_PERF_SEL_SC2_PA4_DATA_FIFO_EOP_RD = 0x00000141, - PH_PERF_SEL_SC2_PA4_EOPG_WE = 0x00000142, - PH_PERF_SEL_SC2_PA5_DATA_FIFO_RD = 0x00000144, - PH_PERF_SEL_SC2_PA5_DATA_FIFO_WE = 0x00000145, - PH_PERF_SEL_SC2_PA5_FIFO_EMPTY = 0x00000146, - PH_PERF_SEL_SC2_PA5_FIFO_FULL = 0x00000147, - PH_PERF_SEL_SC2_PA5_NULL_WE = 0x00000148, - PH_PERF_SEL_SC2_PA5_EVENT_WE = 0x00000149, - PH_PERF_SEL_SC2_PA5_FPOV_WE = 0x0000014a, - PH_PERF_SEL_SC2_PA5_EOP_WE = 0x0000014c, - PH_PERF_SEL_SC2_PA5_DATA_FIFO_EOP_RD = 0x0000014d, - PH_PERF_SEL_SC2_PA5_EOPG_WE = 0x0000014e, - PH_PERF_SEL_SC2_PA6_DATA_FIFO_RD = 0x00000150, - PH_PERF_SEL_SC2_PA6_DATA_FIFO_WE = 0x00000151, - PH_PERF_SEL_SC2_PA6_FIFO_EMPTY = 0x00000152, - PH_PERF_SEL_SC2_PA6_FIFO_FULL = 0x00000153, - PH_PERF_SEL_SC2_PA6_NULL_WE = 0x00000154, - PH_PERF_SEL_SC2_PA6_EVENT_WE = 0x00000155, - PH_PERF_SEL_SC2_PA6_FPOV_WE = 0x00000156, - PH_PERF_SEL_SC2_PA6_EOP_WE = 0x00000158, - PH_PERF_SEL_SC2_PA6_DATA_FIFO_EOP_RD = 0x00000159, - PH_PERF_SEL_SC2_PA6_EOPG_WE = 0x0000015a, - PH_PERF_SEL_SC2_PA7_DATA_FIFO_RD = 0x0000015c, - PH_PERF_SEL_SC2_PA7_DATA_FIFO_WE = 0x0000015d, - PH_PERF_SEL_SC2_PA7_FIFO_EMPTY = 0x0000015e, - PH_PERF_SEL_SC2_PA7_FIFO_FULL = 0x0000015f, - PH_PERF_SEL_SC2_PA7_NULL_WE = 0x00000160, - PH_PERF_SEL_SC2_PA7_EVENT_WE = 0x00000161, - PH_PERF_SEL_SC2_PA7_FPOV_WE = 0x00000162, - PH_PERF_SEL_SC2_PA7_EOP_WE = 0x00000164, - PH_PERF_SEL_SC2_PA7_DATA_FIFO_EOP_RD = 0x00000165, - PH_PERF_SEL_SC2_PA7_EOPG_WE = 0x00000166, - PH_PERF_SEL_SC3_SRPS_WINDOW_VALID = 0x00000168, - PH_PERF_SEL_SC3_ARB_XFC_ALL_EVENT_OR_PRIM_CYCLES = 0x00000169, - PH_PERF_SEL_SC3_ARB_XFC_ONLY_PRIM_CYCLES = 0x0000016a, - PH_PERF_SEL_SC3_ARB_XFC_ONLY_ONE_INC_PER_PRIM = 0x0000016b, - PH_PERF_SEL_SC3_ARB_STALLED_FROM_BELOW = 0x0000016c, - PH_PERF_SEL_SC3_ARB_STARVED_FROM_ABOVE = 0x0000016d, - PH_PERF_SEL_SC3_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY = 0x0000016e, - PH_PERF_SEL_SC3_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL = 0x0000016f, - PH_PERF_SEL_SC3_ARB_BUSY = 0x00000170, - PH_PERF_SEL_SC3_ARB_PA_BUSY_SOP = 0x00000171, - PH_PERF_SEL_SC3_ARB_EOP_POP_SYNC_POP = 0x00000172, - PH_PERF_SEL_SC3_ARB_EVENT_SYNC_POP = 0x00000173, - PH_PERF_SEL_SC3_PS_ENG_MULTICYCLE_BUBBLE = 0x00000174, - PH_PERF_SEL_SC3_EOP_SYNC_WINDOW = 0x00000175, - PH_PERF_SEL_SC3_BUSY_PROCESSING_MULTICYCLE_PRIM = 0x00000176, - PH_PERF_SEL_SC3_BUSY_CNT_NOT_ZERO = 0x00000177, - PH_PERF_SEL_SC3_SEND = 0x00000178, - PH_PERF_SEL_SC3_CREDIT_AT_ZERO_WITH_PENDING_SEND = 0x00000179, - PH_PERF_SEL_SC3_CREDIT_AT_MAX = 0x0000017a, - PH_PERF_SEL_SC3_CREDIT_AT_MAX_NO_PENDING_SEND = 0x0000017b, - PH_PERF_SEL_SC3_PA0_DATA_FIFO_RD = 0x00000180, - PH_PERF_SEL_SC3_PA0_DATA_FIFO_WE = 0x00000181, - PH_PERF_SEL_SC3_PA0_FIFO_EMPTY = 0x00000182, - PH_PERF_SEL_SC3_PA0_FIFO_FULL = 0x00000183, - PH_PERF_SEL_SC3_PA0_NULL_WE = 0x00000184, - PH_PERF_SEL_SC3_PA0_EVENT_WE = 0x00000185, - PH_PERF_SEL_SC3_PA0_FPOV_WE = 0x00000186, - PH_PERF_SEL_SC3_PA0_EOP_WE = 0x00000188, - PH_PERF_SEL_SC3_PA0_DATA_FIFO_EOP_RD = 0x00000189, - PH_PERF_SEL_SC3_PA0_EOPG_WE = 0x0000018a, - PH_PERF_SEL_SC3_PA1_DATA_FIFO_RD = 0x0000018c, - PH_PERF_SEL_SC3_PA1_DATA_FIFO_WE = 0x0000018d, - PH_PERF_SEL_SC3_PA1_FIFO_EMPTY = 0x0000018e, - PH_PERF_SEL_SC3_PA1_FIFO_FULL = 0x0000018f, - PH_PERF_SEL_SC3_PA1_NULL_WE = 0x00000190, - PH_PERF_SEL_SC3_PA1_EVENT_WE = 0x00000191, - PH_PERF_SEL_SC3_PA1_FPOV_WE = 0x00000192, - PH_PERF_SEL_SC3_PA1_EOP_WE = 0x00000194, - PH_PERF_SEL_SC3_PA1_DATA_FIFO_EOP_RD = 0x00000195, - PH_PERF_SEL_SC3_PA1_EOPG_WE = 0x00000196, - PH_PERF_SEL_SC3_PA2_DATA_FIFO_RD = 0x00000198, - PH_PERF_SEL_SC3_PA2_DATA_FIFO_WE = 0x00000199, - PH_PERF_SEL_SC3_PA2_FIFO_EMPTY = 0x0000019a, - PH_PERF_SEL_SC3_PA2_FIFO_FULL = 0x0000019b, - PH_PERF_SEL_SC3_PA2_NULL_WE = 0x0000019c, - PH_PERF_SEL_SC3_PA2_EVENT_WE = 0x0000019d, - PH_PERF_SEL_SC3_PA2_FPOV_WE = 0x0000019e, - PH_PERF_SEL_SC3_PA2_EOP_WE = 0x000001a0, - PH_PERF_SEL_SC3_PA2_DATA_FIFO_EOP_RD = 0x000001a1, - PH_PERF_SEL_SC3_PA2_EOPG_WE = 0x000001a2, - PH_PERF_SEL_SC3_PA3_DATA_FIFO_RD = 0x000001a4, - PH_PERF_SEL_SC3_PA3_DATA_FIFO_WE = 0x000001a5, - PH_PERF_SEL_SC3_PA3_FIFO_EMPTY = 0x000001a6, - PH_PERF_SEL_SC3_PA3_FIFO_FULL = 0x000001a7, - PH_PERF_SEL_SC3_PA3_NULL_WE = 0x000001a8, - PH_PERF_SEL_SC3_PA3_EVENT_WE = 0x000001a9, - PH_PERF_SEL_SC3_PA3_FPOV_WE = 0x000001aa, - PH_PERF_SEL_SC3_PA3_EOP_WE = 0x000001ac, - PH_PERF_SEL_SC3_PA3_DATA_FIFO_EOP_RD = 0x000001ad, - PH_PERF_SEL_SC3_PA3_EOPG_WE = 0x000001ae, - PH_PERF_SEL_SC3_PA4_DATA_FIFO_RD = 0x000001b0, - PH_PERF_SEL_SC3_PA4_DATA_FIFO_WE = 0x000001b1, - PH_PERF_SEL_SC3_PA4_FIFO_EMPTY = 0x000001b2, - PH_PERF_SEL_SC3_PA4_FIFO_FULL = 0x000001b3, - PH_PERF_SEL_SC3_PA4_NULL_WE = 0x000001b4, - PH_PERF_SEL_SC3_PA4_EVENT_WE = 0x000001b5, - PH_PERF_SEL_SC3_PA4_FPOV_WE = 0x000001b6, - PH_PERF_SEL_SC3_PA4_EOP_WE = 0x000001b8, - PH_PERF_SEL_SC3_PA4_DATA_FIFO_EOP_RD = 0x000001b9, - PH_PERF_SEL_SC3_PA4_EOPG_WE = 0x000001ba, - PH_PERF_SEL_SC3_PA5_DATA_FIFO_RD = 0x000001bc, - PH_PERF_SEL_SC3_PA5_DATA_FIFO_WE = 0x000001bd, - PH_PERF_SEL_SC3_PA5_FIFO_EMPTY = 0x000001be, - PH_PERF_SEL_SC3_PA5_FIFO_FULL = 0x000001bf, - PH_PERF_SEL_SC3_PA5_NULL_WE = 0x000001c0, - PH_PERF_SEL_SC3_PA5_EVENT_WE = 0x000001c1, - PH_PERF_SEL_SC3_PA5_FPOV_WE = 0x000001c2, - PH_PERF_SEL_SC3_PA5_EOP_WE = 0x000001c4, - PH_PERF_SEL_SC3_PA5_DATA_FIFO_EOP_RD = 0x000001c5, - PH_PERF_SEL_SC3_PA5_EOPG_WE = 0x000001c6, - PH_PERF_SEL_SC3_PA6_DATA_FIFO_RD = 0x000001c8, - PH_PERF_SEL_SC3_PA6_DATA_FIFO_WE = 0x000001c9, - PH_PERF_SEL_SC3_PA6_FIFO_EMPTY = 0x000001ca, - PH_PERF_SEL_SC3_PA6_FIFO_FULL = 0x000001cb, - PH_PERF_SEL_SC3_PA6_NULL_WE = 0x000001cc, - PH_PERF_SEL_SC3_PA6_EVENT_WE = 0x000001cd, - PH_PERF_SEL_SC3_PA6_FPOV_WE = 0x000001ce, - PH_PERF_SEL_SC3_PA6_EOP_WE = 0x000001d0, - PH_PERF_SEL_SC3_PA6_DATA_FIFO_EOP_RD = 0x000001d1, - PH_PERF_SEL_SC3_PA6_EOPG_WE = 0x000001d2, - PH_PERF_SEL_SC3_PA7_DATA_FIFO_RD = 0x000001d4, - PH_PERF_SEL_SC3_PA7_DATA_FIFO_WE = 0x000001d5, - PH_PERF_SEL_SC3_PA7_FIFO_EMPTY = 0x000001d6, - PH_PERF_SEL_SC3_PA7_FIFO_FULL = 0x000001d7, - PH_PERF_SEL_SC3_PA7_NULL_WE = 0x000001d8, - PH_PERF_SEL_SC3_PA7_EVENT_WE = 0x000001d9, - PH_PERF_SEL_SC3_PA7_FPOV_WE = 0x000001da, - PH_PERF_SEL_SC3_PA7_EOP_WE = 0x000001dc, - PH_PERF_SEL_SC3_PA7_DATA_FIFO_EOP_RD = 0x000001dd, - PH_PERF_SEL_SC3_PA7_EOPG_WE = 0x000001de, - PH_PERF_SEL_SC4_SRPS_WINDOW_VALID = 0x000001e0, - PH_PERF_SEL_SC4_ARB_XFC_ALL_EVENT_OR_PRIM_CYCLES = 0x000001e1, - PH_PERF_SEL_SC4_ARB_XFC_ONLY_PRIM_CYCLES = 0x000001e2, - PH_PERF_SEL_SC4_ARB_XFC_ONLY_ONE_INC_PER_PRIM = 0x000001e3, - PH_PERF_SEL_SC4_ARB_STALLED_FROM_BELOW = 0x000001e4, - PH_PERF_SEL_SC4_ARB_STARVED_FROM_ABOVE = 0x000001e5, - PH_PERF_SEL_SC4_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY = 0x000001e6, - PH_PERF_SEL_SC4_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL = 0x000001e7, - PH_PERF_SEL_SC4_ARB_BUSY = 0x000001e8, - PH_PERF_SEL_SC4_ARB_PA_BUSY_SOP = 0x000001e9, - PH_PERF_SEL_SC4_ARB_EOP_POP_SYNC_POP = 0x000001ea, - PH_PERF_SEL_SC4_ARB_EVENT_SYNC_POP = 0x000001eb, - PH_PERF_SEL_SC4_PS_ENG_MULTICYCLE_BUBBLE = 0x000001ec, - PH_PERF_SEL_SC4_EOP_SYNC_WINDOW = 0x000001ed, - PH_PERF_SEL_SC4_BUSY_PROCESSING_MULTICYCLE_PRIM = 0x000001ee, - PH_PERF_SEL_SC4_BUSY_CNT_NOT_ZERO = 0x000001ef, - PH_PERF_SEL_SC4_SEND = 0x000001f0, - PH_PERF_SEL_SC4_CREDIT_AT_ZERO_WITH_PENDING_SEND = 0x000001f1, - PH_PERF_SEL_SC4_CREDIT_AT_MAX = 0x000001f2, - PH_PERF_SEL_SC4_CREDIT_AT_MAX_NO_PENDING_SEND = 0x000001f3, - PH_PERF_SEL_SC4_PA0_DATA_FIFO_RD = 0x000001f8, - PH_PERF_SEL_SC4_PA0_DATA_FIFO_WE = 0x000001f9, - PH_PERF_SEL_SC4_PA0_FIFO_EMPTY = 0x000001fa, - PH_PERF_SEL_SC4_PA0_FIFO_FULL = 0x000001fb, - PH_PERF_SEL_SC4_PA0_NULL_WE = 0x000001fc, - PH_PERF_SEL_SC4_PA0_EVENT_WE = 0x000001fd, - PH_PERF_SEL_SC4_PA0_FPOV_WE = 0x000001fe, - PH_PERF_SEL_SC4_PA0_EOP_WE = 0x00000200, - PH_PERF_SEL_SC4_PA0_DATA_FIFO_EOP_RD = 0x00000201, - PH_PERF_SEL_SC4_PA0_EOPG_WE = 0x00000202, - PH_PERF_SEL_SC4_PA1_DATA_FIFO_RD = 0x00000204, - PH_PERF_SEL_SC4_PA1_DATA_FIFO_WE = 0x00000205, - PH_PERF_SEL_SC4_PA1_FIFO_EMPTY = 0x00000206, - PH_PERF_SEL_SC4_PA1_FIFO_FULL = 0x00000207, - PH_PERF_SEL_SC4_PA1_NULL_WE = 0x00000208, - PH_PERF_SEL_SC4_PA1_EVENT_WE = 0x00000209, - PH_PERF_SEL_SC4_PA1_FPOV_WE = 0x0000020a, - PH_PERF_SEL_SC4_PA1_EOP_WE = 0x0000020c, - PH_PERF_SEL_SC4_PA1_DATA_FIFO_EOP_RD = 0x0000020d, - PH_PERF_SEL_SC4_PA1_EOPG_WE = 0x0000020e, - PH_PERF_SEL_SC4_PA2_DATA_FIFO_RD = 0x00000210, - PH_PERF_SEL_SC4_PA2_DATA_FIFO_WE = 0x00000211, - PH_PERF_SEL_SC4_PA2_FIFO_EMPTY = 0x00000212, - PH_PERF_SEL_SC4_PA2_FIFO_FULL = 0x00000213, - PH_PERF_SEL_SC4_PA2_NULL_WE = 0x00000214, - PH_PERF_SEL_SC4_PA2_EVENT_WE = 0x00000215, - PH_PERF_SEL_SC4_PA2_FPOV_WE = 0x00000216, - PH_PERF_SEL_SC4_PA2_EOP_WE = 0x00000218, - PH_PERF_SEL_SC4_PA2_DATA_FIFO_EOP_RD = 0x00000219, - PH_PERF_SEL_SC4_PA2_EOPG_WE = 0x0000021a, - PH_PERF_SEL_SC4_PA3_DATA_FIFO_RD = 0x0000021c, - PH_PERF_SEL_SC4_PA3_DATA_FIFO_WE = 0x0000021d, - PH_PERF_SEL_SC4_PA3_FIFO_EMPTY = 0x0000021e, - PH_PERF_SEL_SC4_PA3_FIFO_FULL = 0x0000021f, - PH_PERF_SEL_SC4_PA3_NULL_WE = 0x00000220, - PH_PERF_SEL_SC4_PA3_EVENT_WE = 0x00000221, - PH_PERF_SEL_SC4_PA3_FPOV_WE = 0x00000222, - PH_PERF_SEL_SC4_PA3_EOP_WE = 0x00000224, - PH_PERF_SEL_SC4_PA3_DATA_FIFO_EOP_RD = 0x00000225, - PH_PERF_SEL_SC4_PA3_EOPG_WE = 0x00000226, - PH_PERF_SEL_SC4_PA4_DATA_FIFO_RD = 0x00000228, - PH_PERF_SEL_SC4_PA4_DATA_FIFO_WE = 0x00000229, - PH_PERF_SEL_SC4_PA4_FIFO_EMPTY = 0x0000022a, - PH_PERF_SEL_SC4_PA4_FIFO_FULL = 0x0000022b, - PH_PERF_SEL_SC4_PA4_NULL_WE = 0x0000022c, - PH_PERF_SEL_SC4_PA4_EVENT_WE = 0x0000022d, - PH_PERF_SEL_SC4_PA4_FPOV_WE = 0x0000022e, - PH_PERF_SEL_SC4_PA4_EOP_WE = 0x00000230, - PH_PERF_SEL_SC4_PA4_DATA_FIFO_EOP_RD = 0x00000231, - PH_PERF_SEL_SC4_PA4_EOPG_WE = 0x00000232, - PH_PERF_SEL_SC4_PA5_DATA_FIFO_RD = 0x00000234, - PH_PERF_SEL_SC4_PA5_DATA_FIFO_WE = 0x00000235, - PH_PERF_SEL_SC4_PA5_FIFO_EMPTY = 0x00000236, - PH_PERF_SEL_SC4_PA5_FIFO_FULL = 0x00000237, - PH_PERF_SEL_SC4_PA5_NULL_WE = 0x00000238, - PH_PERF_SEL_SC4_PA5_EVENT_WE = 0x00000239, - PH_PERF_SEL_SC4_PA5_FPOV_WE = 0x0000023a, - PH_PERF_SEL_SC4_PA5_EOP_WE = 0x0000023c, - PH_PERF_SEL_SC4_PA5_DATA_FIFO_EOP_RD = 0x0000023d, - PH_PERF_SEL_SC4_PA5_EOPG_WE = 0x0000023e, - PH_PERF_SEL_SC4_PA6_DATA_FIFO_RD = 0x00000240, - PH_PERF_SEL_SC4_PA6_DATA_FIFO_WE = 0x00000241, - PH_PERF_SEL_SC4_PA6_FIFO_EMPTY = 0x00000242, - PH_PERF_SEL_SC4_PA6_FIFO_FULL = 0x00000243, - PH_PERF_SEL_SC4_PA6_NULL_WE = 0x00000244, - PH_PERF_SEL_SC4_PA6_EVENT_WE = 0x00000245, - PH_PERF_SEL_SC4_PA6_FPOV_WE = 0x00000246, - PH_PERF_SEL_SC4_PA6_EOP_WE = 0x00000248, - PH_PERF_SEL_SC4_PA6_DATA_FIFO_EOP_RD = 0x00000249, - PH_PERF_SEL_SC4_PA6_EOPG_WE = 0x0000024a, - PH_PERF_SEL_SC4_PA7_DATA_FIFO_RD = 0x0000024c, - PH_PERF_SEL_SC4_PA7_DATA_FIFO_WE = 0x0000024d, - PH_PERF_SEL_SC4_PA7_FIFO_EMPTY = 0x0000024e, - PH_PERF_SEL_SC4_PA7_FIFO_FULL = 0x0000024f, - PH_PERF_SEL_SC4_PA7_NULL_WE = 0x00000250, - PH_PERF_SEL_SC4_PA7_EVENT_WE = 0x00000251, - PH_PERF_SEL_SC4_PA7_FPOV_WE = 0x00000252, - PH_PERF_SEL_SC4_PA7_EOP_WE = 0x00000254, - PH_PERF_SEL_SC4_PA7_DATA_FIFO_EOP_RD = 0x00000255, - PH_PERF_SEL_SC4_PA7_EOPG_WE = 0x00000256, - PH_PERF_SEL_SC5_SRPS_WINDOW_VALID = 0x00000258, - PH_PERF_SEL_SC5_ARB_XFC_ALL_EVENT_OR_PRIM_CYCLES = 0x00000259, - PH_PERF_SEL_SC5_ARB_XFC_ONLY_PRIM_CYCLES = 0x0000025a, - PH_PERF_SEL_SC5_ARB_XFC_ONLY_ONE_INC_PER_PRIM = 0x0000025b, - PH_PERF_SEL_SC5_ARB_STALLED_FROM_BELOW = 0x0000025c, - PH_PERF_SEL_SC5_ARB_STARVED_FROM_ABOVE = 0x0000025d, - PH_PERF_SEL_SC5_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY = 0x0000025e, - PH_PERF_SEL_SC5_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL = 0x0000025f, - PH_PERF_SEL_SC5_ARB_BUSY = 0x00000260, - PH_PERF_SEL_SC5_ARB_PA_BUSY_SOP = 0x00000261, - PH_PERF_SEL_SC5_ARB_EOP_POP_SYNC_POP = 0x00000262, - PH_PERF_SEL_SC5_ARB_EVENT_SYNC_POP = 0x00000263, - PH_PERF_SEL_SC5_PS_ENG_MULTICYCLE_BUBBLE = 0x00000264, - PH_PERF_SEL_SC5_EOP_SYNC_WINDOW = 0x00000265, - PH_PERF_SEL_SC5_BUSY_PROCESSING_MULTICYCLE_PRIM = 0x00000266, - PH_PERF_SEL_SC5_BUSY_CNT_NOT_ZERO = 0x00000267, - PH_PERF_SEL_SC5_SEND = 0x00000268, - PH_PERF_SEL_SC5_CREDIT_AT_ZERO_WITH_PENDING_SEND = 0x00000269, - PH_PERF_SEL_SC5_CREDIT_AT_MAX = 0x0000026a, - PH_PERF_SEL_SC5_CREDIT_AT_MAX_NO_PENDING_SEND = 0x0000026b, - PH_PERF_SEL_SC5_PA0_DATA_FIFO_RD = 0x00000270, - PH_PERF_SEL_SC5_PA0_DATA_FIFO_WE = 0x00000271, - PH_PERF_SEL_SC5_PA0_FIFO_EMPTY = 0x00000272, - PH_PERF_SEL_SC5_PA0_FIFO_FULL = 0x00000273, - PH_PERF_SEL_SC5_PA0_NULL_WE = 0x00000274, - PH_PERF_SEL_SC5_PA0_EVENT_WE = 0x00000275, - PH_PERF_SEL_SC5_PA0_FPOV_WE = 0x00000276, - PH_PERF_SEL_SC5_PA0_EOP_WE = 0x00000278, - PH_PERF_SEL_SC5_PA0_DATA_FIFO_EOP_RD = 0x00000279, - PH_PERF_SEL_SC5_PA0_EOPG_WE = 0x0000027a, - PH_PERF_SEL_SC5_PA1_DATA_FIFO_RD = 0x0000027c, - PH_PERF_SEL_SC5_PA1_DATA_FIFO_WE = 0x0000027d, - PH_PERF_SEL_SC5_PA1_FIFO_EMPTY = 0x0000027e, - PH_PERF_SEL_SC5_PA1_FIFO_FULL = 0x0000027f, - PH_PERF_SEL_SC5_PA1_NULL_WE = 0x00000280, - PH_PERF_SEL_SC5_PA1_EVENT_WE = 0x00000281, - PH_PERF_SEL_SC5_PA1_FPOV_WE = 0x00000282, - PH_PERF_SEL_SC5_PA1_EOP_WE = 0x00000284, - PH_PERF_SEL_SC5_PA1_DATA_FIFO_EOP_RD = 0x00000285, - PH_PERF_SEL_SC5_PA1_EOPG_WE = 0x00000286, - PH_PERF_SEL_SC5_PA2_DATA_FIFO_RD = 0x00000288, - PH_PERF_SEL_SC5_PA2_DATA_FIFO_WE = 0x00000289, - PH_PERF_SEL_SC5_PA2_FIFO_EMPTY = 0x0000028a, - PH_PERF_SEL_SC5_PA2_FIFO_FULL = 0x0000028b, - PH_PERF_SEL_SC5_PA2_NULL_WE = 0x0000028c, - PH_PERF_SEL_SC5_PA2_EVENT_WE = 0x0000028d, - PH_PERF_SEL_SC5_PA2_FPOV_WE = 0x0000028e, - PH_PERF_SEL_SC5_PA2_EOP_WE = 0x00000290, - PH_PERF_SEL_SC5_PA2_DATA_FIFO_EOP_RD = 0x00000291, - PH_PERF_SEL_SC5_PA2_EOPG_WE = 0x00000292, - PH_PERF_SEL_SC5_PA3_DATA_FIFO_RD = 0x00000294, - PH_PERF_SEL_SC5_PA3_DATA_FIFO_WE = 0x00000295, - PH_PERF_SEL_SC5_PA3_FIFO_EMPTY = 0x00000296, - PH_PERF_SEL_SC5_PA3_FIFO_FULL = 0x00000297, - PH_PERF_SEL_SC5_PA3_NULL_WE = 0x00000298, - PH_PERF_SEL_SC5_PA3_EVENT_WE = 0x00000299, - PH_PERF_SEL_SC5_PA3_FPOV_WE = 0x0000029a, - PH_PERF_SEL_SC5_PA3_EOP_WE = 0x0000029c, - PH_PERF_SEL_SC5_PA3_DATA_FIFO_EOP_RD = 0x0000029d, - PH_PERF_SEL_SC5_PA3_EOPG_WE = 0x0000029e, - PH_PERF_SEL_SC5_PA4_DATA_FIFO_RD = 0x000002a0, - PH_PERF_SEL_SC5_PA4_DATA_FIFO_WE = 0x000002a1, - PH_PERF_SEL_SC5_PA4_FIFO_EMPTY = 0x000002a2, - PH_PERF_SEL_SC5_PA4_FIFO_FULL = 0x000002a3, - PH_PERF_SEL_SC5_PA4_NULL_WE = 0x000002a4, - PH_PERF_SEL_SC5_PA4_EVENT_WE = 0x000002a5, - PH_PERF_SEL_SC5_PA4_FPOV_WE = 0x000002a6, - PH_PERF_SEL_SC5_PA4_EOP_WE = 0x000002a8, - PH_PERF_SEL_SC5_PA4_DATA_FIFO_EOP_RD = 0x000002a9, - PH_PERF_SEL_SC5_PA4_EOPG_WE = 0x000002aa, - PH_PERF_SEL_SC5_PA5_DATA_FIFO_RD = 0x000002ac, - PH_PERF_SEL_SC5_PA5_DATA_FIFO_WE = 0x000002ad, - PH_PERF_SEL_SC5_PA5_FIFO_EMPTY = 0x000002ae, - PH_PERF_SEL_SC5_PA5_FIFO_FULL = 0x000002af, - PH_PERF_SEL_SC5_PA5_NULL_WE = 0x000002b0, - PH_PERF_SEL_SC5_PA5_EVENT_WE = 0x000002b1, - PH_PERF_SEL_SC5_PA5_FPOV_WE = 0x000002b2, - PH_PERF_SEL_SC5_PA5_EOP_WE = 0x000002b4, - PH_PERF_SEL_SC5_PA5_DATA_FIFO_EOP_RD = 0x000002b5, - PH_PERF_SEL_SC5_PA5_EOPG_WE = 0x000002b6, - PH_PERF_SEL_SC5_PA6_DATA_FIFO_RD = 0x000002b8, - PH_PERF_SEL_SC5_PA6_DATA_FIFO_WE = 0x000002b9, - PH_PERF_SEL_SC5_PA6_FIFO_EMPTY = 0x000002ba, - PH_PERF_SEL_SC5_PA6_FIFO_FULL = 0x000002bb, - PH_PERF_SEL_SC5_PA6_NULL_WE = 0x000002bc, - PH_PERF_SEL_SC5_PA6_EVENT_WE = 0x000002bd, - PH_PERF_SEL_SC5_PA6_FPOV_WE = 0x000002be, - PH_PERF_SEL_SC5_PA6_EOP_WE = 0x000002c0, - PH_PERF_SEL_SC5_PA6_DATA_FIFO_EOP_RD = 0x000002c1, - PH_PERF_SEL_SC5_PA6_EOPG_WE = 0x000002c2, - PH_PERF_SEL_SC5_PA7_DATA_FIFO_RD = 0x000002c4, - PH_PERF_SEL_SC5_PA7_DATA_FIFO_WE = 0x000002c5, - PH_PERF_SEL_SC5_PA7_FIFO_EMPTY = 0x000002c6, - PH_PERF_SEL_SC5_PA7_FIFO_FULL = 0x000002c7, - PH_PERF_SEL_SC5_PA7_NULL_WE = 0x000002c8, - PH_PERF_SEL_SC5_PA7_EVENT_WE = 0x000002c9, - PH_PERF_SEL_SC5_PA7_FPOV_WE = 0x000002ca, - PH_PERF_SEL_SC5_PA7_EOP_WE = 0x000002cc, - PH_PERF_SEL_SC5_PA7_DATA_FIFO_EOP_RD = 0x000002cd, - PH_PERF_SEL_SC5_PA7_EOPG_WE = 0x000002ce, - PH_PERF_SEL_SC6_SRPS_WINDOW_VALID = 0x000002d0, - PH_PERF_SEL_SC6_ARB_XFC_ALL_EVENT_OR_PRIM_CYCLES = 0x000002d1, - PH_PERF_SEL_SC6_ARB_XFC_ONLY_PRIM_CYCLES = 0x000002d2, - PH_PERF_SEL_SC6_ARB_XFC_ONLY_ONE_INC_PER_PRIM = 0x000002d3, - PH_PERF_SEL_SC6_ARB_STALLED_FROM_BELOW = 0x000002d4, - PH_PERF_SEL_SC6_ARB_STARVED_FROM_ABOVE = 0x000002d5, - PH_PERF_SEL_SC6_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY = 0x000002d6, - PH_PERF_SEL_SC6_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL = 0x000002d7, - PH_PERF_SEL_SC6_ARB_BUSY = 0x000002d8, - PH_PERF_SEL_SC6_ARB_PA_BUSY_SOP = 0x000002d9, - PH_PERF_SEL_SC6_ARB_EOP_POP_SYNC_POP = 0x000002da, - PH_PERF_SEL_SC6_ARB_EVENT_SYNC_POP = 0x000002db, - PH_PERF_SEL_SC6_PS_ENG_MULTICYCLE_BUBBLE = 0x000002dc, - PH_PERF_SEL_SC6_EOP_SYNC_WINDOW = 0x000002dd, - PH_PERF_SEL_SC6_BUSY_PROCESSING_MULTICYCLE_PRIM = 0x000002de, - PH_PERF_SEL_SC6_BUSY_CNT_NOT_ZERO = 0x000002df, - PH_PERF_SEL_SC6_SEND = 0x000002e0, - PH_PERF_SEL_SC6_CREDIT_AT_ZERO_WITH_PENDING_SEND = 0x000002e1, - PH_PERF_SEL_SC6_CREDIT_AT_MAX = 0x000002e2, - PH_PERF_SEL_SC6_CREDIT_AT_MAX_NO_PENDING_SEND = 0x000002e3, - PH_PERF_SEL_SC6_PA0_DATA_FIFO_RD = 0x000002e8, - PH_PERF_SEL_SC6_PA0_DATA_FIFO_WE = 0x000002e9, - PH_PERF_SEL_SC6_PA0_FIFO_EMPTY = 0x000002ea, - PH_PERF_SEL_SC6_PA0_FIFO_FULL = 0x000002eb, - PH_PERF_SEL_SC6_PA0_NULL_WE = 0x000002ec, - PH_PERF_SEL_SC6_PA0_EVENT_WE = 0x000002ed, - PH_PERF_SEL_SC6_PA0_FPOV_WE = 0x000002ee, - PH_PERF_SEL_SC6_PA0_EOP_WE = 0x000002f0, - PH_PERF_SEL_SC6_PA0_DATA_FIFO_EOP_RD = 0x000002f1, - PH_PERF_SEL_SC6_PA0_EOPG_WE = 0x000002f2, - PH_PERF_SEL_SC6_PA1_DATA_FIFO_RD = 0x000002f4, - PH_PERF_SEL_SC6_PA1_DATA_FIFO_WE = 0x000002f5, - PH_PERF_SEL_SC6_PA1_FIFO_EMPTY = 0x000002f6, - PH_PERF_SEL_SC6_PA1_FIFO_FULL = 0x000002f7, - PH_PERF_SEL_SC6_PA1_NULL_WE = 0x000002f8, - PH_PERF_SEL_SC6_PA1_EVENT_WE = 0x000002f9, - PH_PERF_SEL_SC6_PA1_FPOV_WE = 0x000002fa, - PH_PERF_SEL_SC6_PA1_EOP_WE = 0x000002fc, - PH_PERF_SEL_SC6_PA1_DATA_FIFO_EOP_RD = 0x000002fd, - PH_PERF_SEL_SC6_PA1_EOPG_WE = 0x000002fe, - PH_PERF_SEL_SC6_PA2_DATA_FIFO_RD = 0x00000300, - PH_PERF_SEL_SC6_PA2_DATA_FIFO_WE = 0x00000301, - PH_PERF_SEL_SC6_PA2_FIFO_EMPTY = 0x00000302, - PH_PERF_SEL_SC6_PA2_FIFO_FULL = 0x00000303, - PH_PERF_SEL_SC6_PA2_NULL_WE = 0x00000304, - PH_PERF_SEL_SC6_PA2_EVENT_WE = 0x00000305, - PH_PERF_SEL_SC6_PA2_FPOV_WE = 0x00000306, - PH_PERF_SEL_SC6_PA2_EOP_WE = 0x00000308, - PH_PERF_SEL_SC6_PA2_DATA_FIFO_EOP_RD = 0x00000309, - PH_PERF_SEL_SC6_PA2_EOPG_WE = 0x0000030a, - PH_PERF_SEL_SC6_PA3_DATA_FIFO_RD = 0x0000030c, - PH_PERF_SEL_SC6_PA3_DATA_FIFO_WE = 0x0000030d, - PH_PERF_SEL_SC6_PA3_FIFO_EMPTY = 0x0000030e, - PH_PERF_SEL_SC6_PA3_FIFO_FULL = 0x0000030f, - PH_PERF_SEL_SC6_PA3_NULL_WE = 0x00000310, - PH_PERF_SEL_SC6_PA3_EVENT_WE = 0x00000311, - PH_PERF_SEL_SC6_PA3_FPOV_WE = 0x00000312, - PH_PERF_SEL_SC6_PA3_EOP_WE = 0x00000314, - PH_PERF_SEL_SC6_PA3_DATA_FIFO_EOP_RD = 0x00000315, - PH_PERF_SEL_SC6_PA3_EOPG_WE = 0x00000316, - PH_PERF_SEL_SC6_PA4_DATA_FIFO_RD = 0x00000318, - PH_PERF_SEL_SC6_PA4_DATA_FIFO_WE = 0x00000319, - PH_PERF_SEL_SC6_PA4_FIFO_EMPTY = 0x0000031a, - PH_PERF_SEL_SC6_PA4_FIFO_FULL = 0x0000031b, - PH_PERF_SEL_SC6_PA4_NULL_WE = 0x0000031c, - PH_PERF_SEL_SC6_PA4_EVENT_WE = 0x0000031d, - PH_PERF_SEL_SC6_PA4_FPOV_WE = 0x0000031e, - PH_PERF_SEL_SC6_PA4_EOP_WE = 0x00000320, - PH_PERF_SEL_SC6_PA4_DATA_FIFO_EOP_RD = 0x00000321, - PH_PERF_SEL_SC6_PA4_EOPG_WE = 0x00000322, - PH_PERF_SEL_SC6_PA5_DATA_FIFO_RD = 0x00000324, - PH_PERF_SEL_SC6_PA5_DATA_FIFO_WE = 0x00000325, - PH_PERF_SEL_SC6_PA5_FIFO_EMPTY = 0x00000326, - PH_PERF_SEL_SC6_PA5_FIFO_FULL = 0x00000327, - PH_PERF_SEL_SC6_PA5_NULL_WE = 0x00000328, - PH_PERF_SEL_SC6_PA5_EVENT_WE = 0x00000329, - PH_PERF_SEL_SC6_PA5_FPOV_WE = 0x0000032a, - PH_PERF_SEL_SC6_PA5_EOP_WE = 0x0000032c, - PH_PERF_SEL_SC6_PA5_DATA_FIFO_EOP_RD = 0x0000032d, - PH_PERF_SEL_SC6_PA5_EOPG_WE = 0x0000032e, - PH_PERF_SEL_SC6_PA6_DATA_FIFO_RD = 0x00000330, - PH_PERF_SEL_SC6_PA6_DATA_FIFO_WE = 0x00000331, - PH_PERF_SEL_SC6_PA6_FIFO_EMPTY = 0x00000332, - PH_PERF_SEL_SC6_PA6_FIFO_FULL = 0x00000333, - PH_PERF_SEL_SC6_PA6_NULL_WE = 0x00000334, - PH_PERF_SEL_SC6_PA6_EVENT_WE = 0x00000335, - PH_PERF_SEL_SC6_PA6_FPOV_WE = 0x00000336, - PH_PERF_SEL_SC6_PA6_EOP_WE = 0x00000338, - PH_PERF_SEL_SC6_PA6_DATA_FIFO_EOP_RD = 0x00000339, - PH_PERF_SEL_SC6_PA6_EOPG_WE = 0x0000033a, - PH_PERF_SEL_SC6_PA7_DATA_FIFO_RD = 0x0000033c, - PH_PERF_SEL_SC6_PA7_DATA_FIFO_WE = 0x0000033d, - PH_PERF_SEL_SC6_PA7_FIFO_EMPTY = 0x0000033e, - PH_PERF_SEL_SC6_PA7_FIFO_FULL = 0x0000033f, - PH_PERF_SEL_SC6_PA7_NULL_WE = 0x00000340, - PH_PERF_SEL_SC6_PA7_EVENT_WE = 0x00000341, - PH_PERF_SEL_SC6_PA7_FPOV_WE = 0x00000342, - PH_PERF_SEL_SC6_PA7_EOP_WE = 0x00000344, - PH_PERF_SEL_SC6_PA7_DATA_FIFO_EOP_RD = 0x00000345, - PH_PERF_SEL_SC6_PA7_EOPG_WE = 0x00000346, - PH_PERF_SEL_SC7_SRPS_WINDOW_VALID = 0x00000348, - PH_PERF_SEL_SC7_ARB_XFC_ALL_EVENT_OR_PRIM_CYCLES = 0x00000349, - PH_PERF_SEL_SC7_ARB_XFC_ONLY_PRIM_CYCLES = 0x0000034a, - PH_PERF_SEL_SC7_ARB_XFC_ONLY_ONE_INC_PER_PRIM = 0x0000034b, - PH_PERF_SEL_SC7_ARB_STALLED_FROM_BELOW = 0x0000034c, - PH_PERF_SEL_SC7_ARB_STARVED_FROM_ABOVE = 0x0000034d, - PH_PERF_SEL_SC7_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY = 0x0000034e, - PH_PERF_SEL_SC7_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL = 0x0000034f, - PH_PERF_SEL_SC7_ARB_BUSY = 0x00000350, - PH_PERF_SEL_SC7_ARB_PA_BUSY_SOP = 0x00000351, - PH_PERF_SEL_SC7_ARB_EOP_POP_SYNC_POP = 0x00000352, - PH_PERF_SEL_SC7_ARB_EVENT_SYNC_POP = 0x00000353, - PH_PERF_SEL_SC7_PS_ENG_MULTICYCLE_BUBBLE = 0x00000354, - PH_PERF_SEL_SC7_EOP_SYNC_WINDOW = 0x00000355, - PH_PERF_SEL_SC7_BUSY_PROCESSING_MULTICYCLE_PRIM = 0x00000356, - PH_PERF_SEL_SC7_BUSY_CNT_NOT_ZERO = 0x00000357, - PH_PERF_SEL_SC7_SEND = 0x00000358, - PH_PERF_SEL_SC7_CREDIT_AT_ZERO_WITH_PENDING_SEND = 0x00000359, - PH_PERF_SEL_SC7_CREDIT_AT_MAX = 0x0000035a, - PH_PERF_SEL_SC7_CREDIT_AT_MAX_NO_PENDING_SEND = 0x0000035b, - PH_PERF_SEL_SC7_PA0_DATA_FIFO_RD = 0x00000360, - PH_PERF_SEL_SC7_PA0_DATA_FIFO_WE = 0x00000361, - PH_PERF_SEL_SC7_PA0_FIFO_EMPTY = 0x00000362, - PH_PERF_SEL_SC7_PA0_FIFO_FULL = 0x00000363, - PH_PERF_SEL_SC7_PA0_NULL_WE = 0x00000364, - PH_PERF_SEL_SC7_PA0_EVENT_WE = 0x00000365, - PH_PERF_SEL_SC7_PA0_FPOV_WE = 0x00000366, - PH_PERF_SEL_SC7_PA0_EOP_WE = 0x00000368, - PH_PERF_SEL_SC7_PA0_DATA_FIFO_EOP_RD = 0x00000369, - PH_PERF_SEL_SC7_PA0_EOPG_WE = 0x0000036a, - PH_PERF_SEL_SC7_PA1_DATA_FIFO_RD = 0x0000036c, - PH_PERF_SEL_SC7_PA1_DATA_FIFO_WE = 0x0000036d, - PH_PERF_SEL_SC7_PA1_FIFO_EMPTY = 0x0000036e, - PH_PERF_SEL_SC7_PA1_FIFO_FULL = 0x0000036f, - PH_PERF_SEL_SC7_PA1_NULL_WE = 0x00000370, - PH_PERF_SEL_SC7_PA1_EVENT_WE = 0x00000371, - PH_PERF_SEL_SC7_PA1_FPOV_WE = 0x00000372, - PH_PERF_SEL_SC7_PA1_EOP_WE = 0x00000374, - PH_PERF_SEL_SC7_PA1_DATA_FIFO_EOP_RD = 0x00000375, - PH_PERF_SEL_SC7_PA1_EOPG_WE = 0x00000376, - PH_PERF_SEL_SC7_PA2_DATA_FIFO_RD = 0x00000378, - PH_PERF_SEL_SC7_PA2_DATA_FIFO_WE = 0x00000379, - PH_PERF_SEL_SC7_PA2_FIFO_EMPTY = 0x0000037a, - PH_PERF_SEL_SC7_PA2_FIFO_FULL = 0x0000037b, - PH_PERF_SEL_SC7_PA2_NULL_WE = 0x0000037c, - PH_PERF_SEL_SC7_PA2_EVENT_WE = 0x0000037d, - PH_PERF_SEL_SC7_PA2_FPOV_WE = 0x0000037e, - PH_PERF_SEL_SC7_PA2_EOP_WE = 0x00000380, - PH_PERF_SEL_SC7_PA2_DATA_FIFO_EOP_RD = 0x00000381, - PH_PERF_SEL_SC7_PA2_EOPG_WE = 0x00000382, - PH_PERF_SEL_SC7_PA3_DATA_FIFO_RD = 0x00000384, - PH_PERF_SEL_SC7_PA3_DATA_FIFO_WE = 0x00000385, - PH_PERF_SEL_SC7_PA3_FIFO_EMPTY = 0x00000386, - PH_PERF_SEL_SC7_PA3_FIFO_FULL = 0x00000387, - PH_PERF_SEL_SC7_PA3_NULL_WE = 0x00000388, - PH_PERF_SEL_SC7_PA3_EVENT_WE = 0x00000389, - PH_PERF_SEL_SC7_PA3_FPOV_WE = 0x0000038a, - PH_PERF_SEL_SC7_PA3_EOP_WE = 0x0000038c, - PH_PERF_SEL_SC7_PA3_DATA_FIFO_EOP_RD = 0x0000038d, - PH_PERF_SEL_SC7_PA3_EOPG_WE = 0x0000038e, - PH_PERF_SEL_SC7_PA4_DATA_FIFO_RD = 0x00000390, - PH_PERF_SEL_SC7_PA4_DATA_FIFO_WE = 0x00000391, - PH_PERF_SEL_SC7_PA4_FIFO_EMPTY = 0x00000392, - PH_PERF_SEL_SC7_PA4_FIFO_FULL = 0x00000393, - PH_PERF_SEL_SC7_PA4_NULL_WE = 0x00000394, - PH_PERF_SEL_SC7_PA4_EVENT_WE = 0x00000395, - PH_PERF_SEL_SC7_PA4_FPOV_WE = 0x00000396, - PH_PERF_SEL_SC7_PA4_EOP_WE = 0x00000398, - PH_PERF_SEL_SC7_PA4_DATA_FIFO_EOP_RD = 0x00000399, - PH_PERF_SEL_SC7_PA4_EOPG_WE = 0x0000039a, - PH_PERF_SEL_SC7_PA5_DATA_FIFO_RD = 0x0000039c, - PH_PERF_SEL_SC7_PA5_DATA_FIFO_WE = 0x0000039d, - PH_PERF_SEL_SC7_PA5_FIFO_EMPTY = 0x0000039e, - PH_PERF_SEL_SC7_PA5_FIFO_FULL = 0x0000039f, - PH_PERF_SEL_SC7_PA5_NULL_WE = 0x000003a0, - PH_PERF_SEL_SC7_PA5_EVENT_WE = 0x000003a1, - PH_PERF_SEL_SC7_PA5_FPOV_WE = 0x000003a2, - PH_PERF_SEL_SC7_PA5_EOP_WE = 0x000003a4, - PH_PERF_SEL_SC7_PA5_DATA_FIFO_EOP_RD = 0x000003a5, - PH_PERF_SEL_SC7_PA5_EOPG_WE = 0x000003a6, - PH_PERF_SEL_SC7_PA6_DATA_FIFO_RD = 0x000003a8, - PH_PERF_SEL_SC7_PA6_DATA_FIFO_WE = 0x000003a9, - PH_PERF_SEL_SC7_PA6_FIFO_EMPTY = 0x000003aa, - PH_PERF_SEL_SC7_PA6_FIFO_FULL = 0x000003ab, - PH_PERF_SEL_SC7_PA6_NULL_WE = 0x000003ac, - PH_PERF_SEL_SC7_PA6_EVENT_WE = 0x000003ad, - PH_PERF_SEL_SC7_PA6_FPOV_WE = 0x000003ae, - PH_PERF_SEL_SC7_PA6_EOP_WE = 0x000003b0, - PH_PERF_SEL_SC7_PA6_DATA_FIFO_EOP_RD = 0x000003b1, - PH_PERF_SEL_SC7_PA6_EOPG_WE = 0x000003b2, - PH_PERF_SEL_SC7_PA7_DATA_FIFO_RD = 0x000003b4, - PH_PERF_SEL_SC7_PA7_DATA_FIFO_WE = 0x000003b5, - PH_PERF_SEL_SC7_PA7_FIFO_EMPTY = 0x000003b6, - PH_PERF_SEL_SC7_PA7_FIFO_FULL = 0x000003b7, - PH_PERF_SEL_SC7_PA7_NULL_WE = 0x000003b8, - PH_PERF_SEL_SC7_PA7_EVENT_WE = 0x000003b9, - PH_PERF_SEL_SC7_PA7_FPOV_WE = 0x000003ba, - PH_PERF_SEL_SC7_PA7_EOP_WE = 0x000003bc, - PH_PERF_SEL_SC7_PA7_DATA_FIFO_EOP_RD = 0x000003bd, - PH_PERF_SEL_SC7_PA7_EOPG_WE = 0x000003be, - PH_PERF_SEL_SC0_PA0_LPOV_WE__GFX10 = 0x0000001f, - PH_PERF_SEL_SC0_PA0_DEALLOC_4_0_RD__GFX10 = 0x00000023, - PH_PERF_SEL_SC0_PA1_LPOV_WE__GFX10 = 0x0000002b, - PH_PERF_SEL_SC0_PA1_DEALLOC_4_0_RD__GFX10 = 0x0000002f, - PH_PERF_SEL_SC0_PA2_LPOV_WE__GFX10 = 0x00000037, - PH_PERF_SEL_SC0_PA2_DEALLOC_4_0_RD__GFX10 = 0x0000003b, - PH_PERF_SEL_SC0_PA3_LPOV_WE__GFX10 = 0x00000043, - PH_PERF_SEL_SC0_PA3_DEALLOC_4_0_RD__GFX10 = 0x00000047, - PH_PERF_SEL_SC0_PA4_LPOV_WE__GFX10 = 0x0000004f, - PH_PERF_SEL_SC0_PA4_DEALLOC_4_0_RD__GFX10 = 0x00000053, - PH_PERF_SEL_SC0_PA5_LPOV_WE__GFX10 = 0x0000005b, - PH_PERF_SEL_SC0_PA5_DEALLOC_4_0_RD__GFX10 = 0x0000005f, - PH_PERF_SEL_SC0_PA6_LPOV_WE__GFX10 = 0x00000067, - PH_PERF_SEL_SC0_PA6_DEALLOC_4_0_RD__GFX10 = 0x0000006b, - PH_PERF_SEL_SC0_PA7_LPOV_WE__GFX10 = 0x00000073, - PH_PERF_SEL_SC0_PA7_DEALLOC_4_0_RD__GFX10 = 0x00000077, - PH_PERF_SEL_SC1_PA0_LPOV_WE__GFX10 = 0x00000097, - PH_PERF_SEL_SC1_PA0_DEALLOC_4_0_RD__GFX10 = 0x0000009b, - PH_PERF_SEL_SC1_PA1_LPOV_WE__GFX10 = 0x000000a3, - PH_PERF_SEL_SC1_PA1_DEALLOC_4_0_RD__GFX10 = 0x000000a7, - PH_PERF_SEL_SC1_PA2_LPOV_WE__GFX10 = 0x000000af, - PH_PERF_SEL_SC1_PA2_DEALLOC_4_0_RD__GFX10 = 0x000000b3, - PH_PERF_SEL_SC1_PA3_LPOV_WE__GFX10 = 0x000000bb, - PH_PERF_SEL_SC1_PA3_DEALLOC_4_0_RD__GFX10 = 0x000000bf, - PH_PERF_SEL_SC1_PA4_LPOV_WE__GFX10 = 0x000000c7, - PH_PERF_SEL_SC1_PA4_DEALLOC_4_0_RD__GFX10 = 0x000000cb, - PH_PERF_SEL_SC1_PA5_LPOV_WE__GFX10 = 0x000000d3, - PH_PERF_SEL_SC1_PA5_DEALLOC_4_0_RD__GFX10 = 0x000000d7, - PH_PERF_SEL_SC1_PA6_LPOV_WE__GFX10 = 0x000000df, - PH_PERF_SEL_SC1_PA6_DEALLOC_4_0_RD__GFX10 = 0x000000e3, - PH_PERF_SEL_SC1_PA7_LPOV_WE__GFX10 = 0x000000eb, - PH_PERF_SEL_SC1_PA7_DEALLOC_4_0_RD__GFX10 = 0x000000ef, - PH_PERF_SEL_SC2_PA0_LPOV_WE__GFX10 = 0x0000010f, - PH_PERF_SEL_SC2_PA0_DEALLOC_4_0_RD__GFX10 = 0x00000113, - PH_PERF_SEL_SC2_PA1_LPOV_WE__GFX10 = 0x0000011b, - PH_PERF_SEL_SC2_PA1_DEALLOC_4_0_RD__GFX10 = 0x0000011f, - PH_PERF_SEL_SC2_PA2_LPOV_WE__GFX10 = 0x00000127, - PH_PERF_SEL_SC2_PA2_DEALLOC_4_0_RD__GFX10 = 0x0000012b, - PH_PERF_SEL_SC2_PA3_LPOV_WE__GFX10 = 0x00000133, - PH_PERF_SEL_SC2_PA3_DEALLOC_4_0_RD__GFX10 = 0x00000137, - PH_PERF_SEL_SC2_PA4_LPOV_WE__GFX10 = 0x0000013f, - PH_PERF_SEL_SC2_PA4_DEALLOC_4_0_RD__GFX10 = 0x00000143, - PH_PERF_SEL_SC2_PA5_LPOV_WE__GFX10 = 0x0000014b, - PH_PERF_SEL_SC2_PA5_DEALLOC_4_0_RD__GFX10 = 0x0000014f, - PH_PERF_SEL_SC2_PA6_LPOV_WE__GFX10 = 0x00000157, - PH_PERF_SEL_SC2_PA6_DEALLOC_4_0_RD__GFX10 = 0x0000015b, - PH_PERF_SEL_SC2_PA7_LPOV_WE__GFX10 = 0x00000163, - PH_PERF_SEL_SC2_PA7_DEALLOC_4_0_RD__GFX10 = 0x00000167, - PH_PERF_SEL_SC3_PA0_LPOV_WE__GFX10 = 0x00000187, - PH_PERF_SEL_SC3_PA0_DEALLOC_4_0_RD__GFX10 = 0x0000018b, - PH_PERF_SEL_SC3_PA1_LPOV_WE__GFX10 = 0x00000193, - PH_PERF_SEL_SC3_PA1_DEALLOC_4_0_RD__GFX10 = 0x00000197, - PH_PERF_SEL_SC3_PA2_LPOV_WE__GFX10 = 0x0000019f, - PH_PERF_SEL_SC3_PA2_DEALLOC_4_0_RD__GFX10 = 0x000001a3, - PH_PERF_SEL_SC3_PA3_LPOV_WE__GFX10 = 0x000001ab, - PH_PERF_SEL_SC3_PA3_DEALLOC_4_0_RD__GFX10 = 0x000001af, - PH_PERF_SEL_SC3_PA4_LPOV_WE__GFX10 = 0x000001b7, - PH_PERF_SEL_SC3_PA4_DEALLOC_4_0_RD__GFX10 = 0x000001bb, - PH_PERF_SEL_SC3_PA5_LPOV_WE__GFX10 = 0x000001c3, - PH_PERF_SEL_SC3_PA5_DEALLOC_4_0_RD__GFX10 = 0x000001c7, - PH_PERF_SEL_SC3_PA6_LPOV_WE__GFX10 = 0x000001cf, - PH_PERF_SEL_SC3_PA6_DEALLOC_4_0_RD__GFX10 = 0x000001d3, - PH_PERF_SEL_SC3_PA7_LPOV_WE__GFX10 = 0x000001db, - PH_PERF_SEL_SC3_PA7_DEALLOC_4_0_RD__GFX10 = 0x000001df, - PH_PERF_SEL_SC4_PA0_LPOV_WE__GFX10 = 0x000001ff, - PH_PERF_SEL_SC4_PA0_DEALLOC_4_0_RD__GFX10 = 0x00000203, - PH_PERF_SEL_SC4_PA1_LPOV_WE__GFX10 = 0x0000020b, - PH_PERF_SEL_SC4_PA1_DEALLOC_4_0_RD__GFX10 = 0x0000020f, - PH_PERF_SEL_SC4_PA2_LPOV_WE__GFX10 = 0x00000217, - PH_PERF_SEL_SC4_PA2_DEALLOC_4_0_RD__GFX10 = 0x0000021b, - PH_PERF_SEL_SC4_PA3_LPOV_WE__GFX10 = 0x00000223, - PH_PERF_SEL_SC4_PA3_DEALLOC_4_0_RD__GFX10 = 0x00000227, - PH_PERF_SEL_SC4_PA4_LPOV_WE__GFX10 = 0x0000022f, - PH_PERF_SEL_SC4_PA4_DEALLOC_4_0_RD__GFX10 = 0x00000233, - PH_PERF_SEL_SC4_PA5_LPOV_WE__GFX10 = 0x0000023b, - PH_PERF_SEL_SC4_PA5_DEALLOC_4_0_RD__GFX10 = 0x0000023f, - PH_PERF_SEL_SC4_PA6_LPOV_WE__GFX10 = 0x00000247, - PH_PERF_SEL_SC4_PA6_DEALLOC_4_0_RD__GFX10 = 0x0000024b, - PH_PERF_SEL_SC4_PA7_LPOV_WE__GFX10 = 0x00000253, - PH_PERF_SEL_SC4_PA7_DEALLOC_4_0_RD__GFX10 = 0x00000257, - PH_PERF_SEL_SC5_PA0_LPOV_WE__GFX10 = 0x00000277, - PH_PERF_SEL_SC5_PA0_DEALLOC_4_0_RD__GFX10 = 0x0000027b, - PH_PERF_SEL_SC5_PA1_LPOV_WE__GFX10 = 0x00000283, - PH_PERF_SEL_SC5_PA1_DEALLOC_4_0_RD__GFX10 = 0x00000287, - PH_PERF_SEL_SC5_PA2_LPOV_WE__GFX10 = 0x0000028f, - PH_PERF_SEL_SC5_PA2_DEALLOC_4_0_RD__GFX10 = 0x00000293, - PH_PERF_SEL_SC5_PA3_LPOV_WE__GFX10 = 0x0000029b, - PH_PERF_SEL_SC5_PA3_DEALLOC_4_0_RD__GFX10 = 0x0000029f, - PH_PERF_SEL_SC5_PA4_LPOV_WE__GFX10 = 0x000002a7, - PH_PERF_SEL_SC5_PA4_DEALLOC_4_0_RD__GFX10 = 0x000002ab, - PH_PERF_SEL_SC5_PA5_LPOV_WE__GFX10 = 0x000002b3, - PH_PERF_SEL_SC5_PA5_DEALLOC_4_0_RD__GFX10 = 0x000002b7, - PH_PERF_SEL_SC5_PA6_LPOV_WE__GFX10 = 0x000002bf, - PH_PERF_SEL_SC5_PA6_DEALLOC_4_0_RD__GFX10 = 0x000002c3, - PH_PERF_SEL_SC5_PA7_LPOV_WE__GFX10 = 0x000002cb, - PH_PERF_SEL_SC5_PA7_DEALLOC_4_0_RD__GFX10 = 0x000002cf, - PH_PERF_SEL_SC6_PA0_LPOV_WE__GFX10 = 0x000002ef, - PH_PERF_SEL_SC6_PA0_DEALLOC_4_0_RD__GFX10 = 0x000002f3, - PH_PERF_SEL_SC6_PA1_LPOV_WE__GFX10 = 0x000002fb, - PH_PERF_SEL_SC6_PA1_DEALLOC_4_0_RD__GFX10 = 0x000002ff, - PH_PERF_SEL_SC6_PA2_LPOV_WE__GFX10 = 0x00000307, - PH_PERF_SEL_SC6_PA2_DEALLOC_4_0_RD__GFX10 = 0x0000030b, - PH_PERF_SEL_SC6_PA3_LPOV_WE__GFX10 = 0x00000313, - PH_PERF_SEL_SC6_PA3_DEALLOC_4_0_RD__GFX10 = 0x00000317, - PH_PERF_SEL_SC6_PA4_LPOV_WE__GFX10 = 0x0000031f, - PH_PERF_SEL_SC6_PA4_DEALLOC_4_0_RD__GFX10 = 0x00000323, - PH_PERF_SEL_SC6_PA5_LPOV_WE__GFX10 = 0x0000032b, - PH_PERF_SEL_SC6_PA5_DEALLOC_4_0_RD__GFX10 = 0x0000032f, - PH_PERF_SEL_SC6_PA6_LPOV_WE__GFX10 = 0x00000337, - PH_PERF_SEL_SC6_PA6_DEALLOC_4_0_RD__GFX10 = 0x0000033b, - PH_PERF_SEL_SC6_PA7_LPOV_WE__GFX10 = 0x00000343, - PH_PERF_SEL_SC6_PA7_DEALLOC_4_0_RD__GFX10 = 0x00000347, - PH_PERF_SEL_SC7_PA0_LPOV_WE__GFX10 = 0x00000367, - PH_PERF_SEL_SC7_PA0_DEALLOC_4_0_RD__GFX10 = 0x0000036b, - PH_PERF_SEL_SC7_PA1_LPOV_WE__GFX10 = 0x00000373, - PH_PERF_SEL_SC7_PA1_DEALLOC_4_0_RD__GFX10 = 0x00000377, - PH_PERF_SEL_SC7_PA2_LPOV_WE__GFX10 = 0x0000037f, - PH_PERF_SEL_SC7_PA2_DEALLOC_4_0_RD__GFX10 = 0x00000383, - PH_PERF_SEL_SC7_PA3_LPOV_WE__GFX10 = 0x0000038b, - PH_PERF_SEL_SC7_PA3_DEALLOC_4_0_RD__GFX10 = 0x0000038f, - PH_PERF_SEL_SC7_PA4_LPOV_WE__GFX10 = 0x00000397, - PH_PERF_SEL_SC7_PA4_DEALLOC_4_0_RD__GFX10 = 0x0000039b, - PH_PERF_SEL_SC7_PA5_LPOV_WE__GFX10 = 0x000003a3, - PH_PERF_SEL_SC7_PA5_DEALLOC_4_0_RD__GFX10 = 0x000003a7, - PH_PERF_SEL_SC7_PA6_LPOV_WE__GFX10 = 0x000003af, - PH_PERF_SEL_SC7_PA6_DEALLOC_4_0_RD__GFX10 = 0x000003b3, - PH_PERF_SEL_SC7_PA7_LPOV_WE__GFX10 = 0x000003bb, - PH_PERF_SEL_SC7_PA7_DEALLOC_4_0_RD__GFX10 = 0x000003bf, - PH_PERF_SEL_SC0_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX101 = 0x00000014, - PH_PERF_SEL_SC0_GFX_PIPE_PRIM_PROVOKED_TRANSITION__GFX101 = 0x00000015, - PH_PERF_SEL_SC0_GFX_PIPE0_TO_1_TRANSITION__GFX101 = 0x00000016, - PH_PERF_SEL_SC0_GFX_PIPE1_TO_0_TRANSITION__GFX101 = 0x00000017, - PH_PERF_SEL_SC1_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX101 = 0x0000008c, - PH_PERF_SEL_SC1_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX101 = 0x0000008d, - PH_PERF_SEL_SC1_GFX_PIPE0_TO_1_TRANSITION__GFX101 = 0x0000008e, - PH_PERF_SEL_SC1_GFX_PIPE1_TO_0_TRANSITION__GFX101 = 0x0000008f, - PH_PERF_SEL_SC2_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX101 = 0x00000104, - PH_PERF_SEL_SC2_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX101 = 0x00000105, - PH_PERF_SEL_SC2_GFX_PIPE0_TO_1_TRANSITION__GFX101 = 0x00000106, - PH_PERF_SEL_SC2_GFX_PIPE1_TO_0_TRANSITION__GFX101 = 0x00000107, - PH_PERF_SEL_SC3_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX101 = 0x0000017c, - PH_PERF_SEL_SC3_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX101 = 0x0000017d, - PH_PERF_SEL_SC3_GFX_PIPE0_TO_1_TRANSITION__GFX101 = 0x0000017e, - PH_PERF_SEL_SC3_GFX_PIPE1_TO_0_TRANSITION__GFX101 = 0x0000017f, - PH_PERF_SEL_SC4_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX101 = 0x000001f4, - PH_PERF_SEL_SC4_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX101 = 0x000001f5, - PH_PERF_SEL_SC4_GFX_PIPE0_TO_1_TRANSITION__GFX101 = 0x000001f6, - PH_PERF_SEL_SC4_GFX_PIPE1_TO_0_TRANSITION__GFX101 = 0x000001f7, - PH_PERF_SEL_SC5_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX101 = 0x0000026c, - PH_PERF_SEL_SC5_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX101 = 0x0000026d, - PH_PERF_SEL_SC5_GFX_PIPE0_TO_1_TRANSITION__GFX101 = 0x0000026e, - PH_PERF_SEL_SC5_GFX_PIPE1_TO_0_TRANSITION__GFX101 = 0x0000026f, - PH_PERF_SEL_SC6_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX101 = 0x000002e4, - PH_PERF_SEL_SC6_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX101 = 0x000002e5, - PH_PERF_SEL_SC6_GFX_PIPE0_TO_1_TRANSITION__GFX101 = 0x000002e6, - PH_PERF_SEL_SC6_GFX_PIPE1_TO_0_TRANSITION__GFX101 = 0x000002e7, - PH_PERF_SEL_SC7_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX101 = 0x0000035c, - PH_PERF_SEL_SC7_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX101 = 0x0000035d, - PH_PERF_SEL_SC7_GFX_PIPE0_TO_1_TRANSITION__GFX101 = 0x0000035e, - PH_PERF_SEL_SC7_GFX_PIPE1_TO_0_TRANSITION__GFX101 = 0x0000035f, - PH_PERF_SEL_1_SC_ARB_STALLED_FROM_BELOW__GFX103PLUSEXCLUSIVE = 0x000003c0, - PH_PERF_SEL_2_SC_ARB_STALLED_FROM_BELOW__GFX103PLUSEXCLUSIVE = 0x000003c1, - PH_PERF_SEL_3_SC_ARB_STALLED_FROM_BELOW__GFX103PLUSEXCLUSIVE = 0x000003c2, - PH_PERF_SEL_4_SC_ARB_STALLED_FROM_BELOW__GFX103PLUSEXCLUSIVE = 0x000003c3, - PH_PERF_SEL_5_SC_ARB_STALLED_FROM_BELOW__GFX103PLUSEXCLUSIVE = 0x000003c4, - PH_PERF_SEL_6_SC_ARB_STALLED_FROM_BELOW__GFX103PLUSEXCLUSIVE = 0x000003c5, - PH_PERF_SEL_7_SC_ARB_STALLED_FROM_BELOW__GFX103PLUSEXCLUSIVE = 0x000003c6, - PH_PERF_SEL_8_SC_ARB_STALLED_FROM_BELOW__GFX103PLUSEXCLUSIVE = 0x000003c7, - PH_PERF_SEL_1_SC_ARB_STARVED_FROM_ABOVE__GFX103PLUSEXCLUSIVE = 0x000003c8, - PH_PERF_SEL_2_SC_ARB_STARVED_FROM_ABOVE__GFX103PLUSEXCLUSIVE = 0x000003c9, - PH_PERF_SEL_3_SC_ARB_STARVED_FROM_ABOVE__GFX103PLUSEXCLUSIVE = 0x000003ca, - PH_PERF_SEL_4_SC_ARB_STARVED_FROM_ABOVE__GFX103PLUSEXCLUSIVE = 0x000003cb, - PH_PERF_SEL_5_SC_ARB_STARVED_FROM_ABOVE__GFX103PLUSEXCLUSIVE = 0x000003cc, - PH_PERF_SEL_6_SC_ARB_STARVED_FROM_ABOVE__GFX103PLUSEXCLUSIVE = 0x000003cd, - PH_PERF_SEL_7_SC_ARB_STARVED_FROM_ABOVE__GFX103PLUSEXCLUSIVE = 0x000003ce, - PH_PERF_SEL_8_SC_ARB_STARVED_FROM_ABOVE__GFX103PLUSEXCLUSIVE = 0x000003cf, - PH_PERF_SEL_1_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY__GFX103PLUSEXCLUSIVE = 0x000003d0, - PH_PERF_SEL_2_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY__GFX103PLUSEXCLUSIVE = 0x000003d1, - PH_PERF_SEL_3_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY__GFX103PLUSEXCLUSIVE = 0x000003d2, - PH_PERF_SEL_4_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY__GFX103PLUSEXCLUSIVE = 0x000003d3, - PH_PERF_SEL_5_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY__GFX103PLUSEXCLUSIVE = 0x000003d4, - PH_PERF_SEL_6_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY__GFX103PLUSEXCLUSIVE = 0x000003d5, - PH_PERF_SEL_7_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY__GFX103PLUSEXCLUSIVE = 0x000003d6, - PH_PERF_SEL_8_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_NOT_EMPTY__GFX103PLUSEXCLUSIVE = 0x000003d7, - PH_PERF_SEL_1_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE = 0x000003d8, - PH_PERF_SEL_2_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE = 0x000003d9, - PH_PERF_SEL_3_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE = 0x000003da, - PH_PERF_SEL_4_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE = 0x000003db, - PH_PERF_SEL_5_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE = 0x000003dc, - PH_PERF_SEL_6_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE = 0x000003dd, - PH_PERF_SEL_7_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE = 0x000003de, - PH_PERF_SEL_8_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE = 0x000003df, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - PH_PERF_SEL_SC0_GFX_PIPE0_TO_1_TRANSITION__GFX11 = 0x00000014, - PH_PERF_SEL_SC0_GFX_PIPE1_TO_0_TRANSITION__GFX11 = 0x00000015, - PH_PERF_SEL_SC0_GFX_PIPE_PRIM_PROVOKED_TRANSITION__GFX11 = 0x00000016, - PH_PERF_SEL_SC0_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX11 = 0x00000017, - PH_PERF_SEL_SC0_PA0_FPOP_WE__GFX11 = 0x0000001f, - PH_PERF_SEL_SC0_PA0_DEALLOC_WE__GFX11 = 0x00000023, - PH_PERF_SEL_SC0_PA1_FPOP_WE__GFX11 = 0x0000002b, - PH_PERF_SEL_SC0_PA1_DEALLOC_WE__GFX11 = 0x0000002f, - PH_PERF_SEL_SC0_PA2_FPOP_WE__GFX11 = 0x00000037, - PH_PERF_SEL_SC0_PA2_DEALLOC_WE__GFX11 = 0x0000003b, - PH_PERF_SEL_SC0_PA3_FPOP_WE__GFX11 = 0x00000043, - PH_PERF_SEL_SC0_PA3_DEALLOC_WE__GFX11 = 0x00000047, - PH_PERF_SEL_SC0_PA4_FPOP_WE__GFX11 = 0x0000004f, - PH_PERF_SEL_SC0_PA4_DEALLOC_WE__GFX11 = 0x00000053, - PH_PERF_SEL_SC0_PA5_FPOP_WE__GFX11 = 0x0000005b, - PH_PERF_SEL_SC0_PA5_DEALLOC_WE__GFX11 = 0x0000005f, - PH_PERF_SEL_SC0_PA6_FPOP_WE__GFX11 = 0x00000067, - PH_PERF_SEL_SC0_PA6_DEALLOC_WE__GFX11 = 0x0000006b, - PH_PERF_SEL_SC0_PA7_FPOP_WE__GFX11 = 0x00000073, - PH_PERF_SEL_SC0_PA7_DEALLOC_WE__GFX11 = 0x00000077, - PH_PERF_SEL_SC1_GFX_PIPE0_TO_1_TRANSITION__GFX11 = 0x0000008c, - PH_PERF_SEL_SC1_GFX_PIPE1_TO_0_TRANSITION__GFX11 = 0x0000008d, - PH_PERF_SEL_SC1_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX11 = 0x0000008e, - PH_PERF_SEL_SC1_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX11 = 0x0000008f, - PH_PERF_SEL_SC1_PA0_FPOP_WE__GFX11 = 0x00000097, - PH_PERF_SEL_SC1_PA0_DEALLOC_WE__GFX11 = 0x0000009b, - PH_PERF_SEL_SC1_PA1_FPOP_WE__GFX11 = 0x000000a3, - PH_PERF_SEL_SC1_PA1_DEALLOC_WE__GFX11 = 0x000000a7, - PH_PERF_SEL_SC1_PA2_FPOP_WE__GFX11 = 0x000000af, - PH_PERF_SEL_SC1_PA2_DEALLOC_WE__GFX11 = 0x000000b3, - PH_PERF_SEL_SC1_PA3_FPOP_WE__GFX11 = 0x000000bb, - PH_PERF_SEL_SC1_PA3_DEALLOC_WE__GFX11 = 0x000000bf, - PH_PERF_SEL_SC1_PA4_FPOP_WE__GFX11 = 0x000000c7, - PH_PERF_SEL_SC1_PA4_DEALLOC_WE__GFX11 = 0x000000cb, - PH_PERF_SEL_SC1_PA5_FPOP_WE__GFX11 = 0x000000d3, - PH_PERF_SEL_SC1_PA5_DEALLOC_WE__GFX11 = 0x000000d7, - PH_PERF_SEL_SC1_PA6_FPOP_WE__GFX11 = 0x000000df, - PH_PERF_SEL_SC1_PA6_DEALLOC_WE__GFX11 = 0x000000e3, - PH_PERF_SEL_SC1_PA7_FPOP_WE__GFX11 = 0x000000eb, - PH_PERF_SEL_SC1_PA7_DEALLOC_WE__GFX11 = 0x000000ef, - PH_PERF_SEL_SC2_GFX_PIPE0_TO_1_TRANSITION__GFX11 = 0x00000104, - PH_PERF_SEL_SC2_GFX_PIPE1_TO_0_TRANSITION__GFX11 = 0x00000105, - PH_PERF_SEL_SC2_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX11 = 0x00000106, - PH_PERF_SEL_SC2_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX11 = 0x00000107, - PH_PERF_SEL_SC2_PA0_FPOP_WE__GFX11 = 0x0000010f, - PH_PERF_SEL_SC2_PA0_DEALLOC_WE__GFX11 = 0x00000113, - PH_PERF_SEL_SC2_PA1_FPOP_WE__GFX11 = 0x0000011b, - PH_PERF_SEL_SC2_PA1_DEALLOC_WE__GFX11 = 0x0000011f, - PH_PERF_SEL_SC2_PA2_FPOP_WE__GFX11 = 0x00000127, - PH_PERF_SEL_SC2_PA2_DEALLOC_WE__GFX11 = 0x0000012b, - PH_PERF_SEL_SC2_PA3_FPOP_WE__GFX11 = 0x00000133, - PH_PERF_SEL_SC2_PA3_DEALLOC_WE__GFX11 = 0x00000137, - PH_PERF_SEL_SC2_PA4_FPOP_WE__GFX11 = 0x0000013f, - PH_PERF_SEL_SC2_PA4_DEALLOC_WE__GFX11 = 0x00000143, - PH_PERF_SEL_SC2_PA5_FPOP_WE__GFX11 = 0x0000014b, - PH_PERF_SEL_SC2_PA5_DEALLOC_WE__GFX11 = 0x0000014f, - PH_PERF_SEL_SC2_PA6_FPOP_WE__GFX11 = 0x00000157, - PH_PERF_SEL_SC2_PA6_DEALLOC_WE__GFX11 = 0x0000015b, - PH_PERF_SEL_SC2_PA7_FPOP_WE__GFX11 = 0x00000163, - PH_PERF_SEL_SC2_PA7_DEALLOC_WE__GFX11 = 0x00000167, - PH_PERF_SEL_SC3_GFX_PIPE0_TO_1_TRANSITION__GFX11 = 0x0000017c, - PH_PERF_SEL_SC3_GFX_PIPE1_TO_0_TRANSITION__GFX11 = 0x0000017d, - PH_PERF_SEL_SC3_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX11 = 0x0000017e, - PH_PERF_SEL_SC3_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX11 = 0x0000017f, - PH_PERF_SEL_SC3_PA0_FPOP_WE__GFX11 = 0x00000187, - PH_PERF_SEL_SC3_PA0_DEALLOC_WE__GFX11 = 0x0000018b, - PH_PERF_SEL_SC3_PA1_FPOP_WE__GFX11 = 0x00000193, - PH_PERF_SEL_SC3_PA1_DEALLOC_WE__GFX11 = 0x00000197, - PH_PERF_SEL_SC3_PA2_FPOP_WE__GFX11 = 0x0000019f, - PH_PERF_SEL_SC3_PA2_DEALLOC_WE__GFX11 = 0x000001a3, - PH_PERF_SEL_SC3_PA3_FPOP_WE__GFX11 = 0x000001ab, - PH_PERF_SEL_SC3_PA3_DEALLOC_WE__GFX11 = 0x000001af, - PH_PERF_SEL_SC3_PA4_FPOP_WE__GFX11 = 0x000001b7, - PH_PERF_SEL_SC3_PA4_DEALLOC_WE__GFX11 = 0x000001bb, - PH_PERF_SEL_SC3_PA5_FPOP_WE__GFX11 = 0x000001c3, - PH_PERF_SEL_SC3_PA5_DEALLOC_WE__GFX11 = 0x000001c7, - PH_PERF_SEL_SC3_PA6_FPOP_WE__GFX11 = 0x000001cf, - PH_PERF_SEL_SC3_PA6_DEALLOC_WE__GFX11 = 0x000001d3, - PH_PERF_SEL_SC3_PA7_FPOP_WE__GFX11 = 0x000001db, - PH_PERF_SEL_SC3_PA7_DEALLOC_WE__GFX11 = 0x000001df, - PH_PERF_SEL_SC4_GFX_PIPE0_TO_1_TRANSITION__GFX11 = 0x000001f4, - PH_PERF_SEL_SC4_GFX_PIPE1_TO_0_TRANSITION__GFX11 = 0x000001f5, - PH_PERF_SEL_SC4_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX11 = 0x000001f6, - PH_PERF_SEL_SC4_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX11 = 0x000001f7, - PH_PERF_SEL_SC4_PA0_FPOP_WE__GFX11 = 0x000001ff, - PH_PERF_SEL_SC4_PA0_DEALLOC_WE__GFX11 = 0x00000203, - PH_PERF_SEL_SC4_PA1_FPOP_WE__GFX11 = 0x0000020b, - PH_PERF_SEL_SC4_PA1_DEALLOC_WE__GFX11 = 0x0000020f, - PH_PERF_SEL_SC4_PA2_FPOP_WE__GFX11 = 0x00000217, - PH_PERF_SEL_SC4_PA2_DEALLOC_WE__GFX11 = 0x0000021b, - PH_PERF_SEL_SC4_PA3_FPOP_WE__GFX11 = 0x00000223, - PH_PERF_SEL_SC4_PA3_DEALLOC_WE__GFX11 = 0x00000227, - PH_PERF_SEL_SC4_PA4_FPOP_WE__GFX11 = 0x0000022f, - PH_PERF_SEL_SC4_PA4_DEALLOC_WE__GFX11 = 0x00000233, - PH_PERF_SEL_SC4_PA5_FPOP_WE__GFX11 = 0x0000023b, - PH_PERF_SEL_SC4_PA5_DEALLOC_WE__GFX11 = 0x0000023f, - PH_PERF_SEL_SC4_PA6_FPOP_WE__GFX11 = 0x00000247, - PH_PERF_SEL_SC4_PA6_DEALLOC_WE__GFX11 = 0x0000024b, - PH_PERF_SEL_SC4_PA7_FPOP_WE__GFX11 = 0x00000253, - PH_PERF_SEL_SC4_PA7_DEALLOC_WE__GFX11 = 0x00000257, - PH_PERF_SEL_SC5_GFX_PIPE0_TO_1_TRANSITION__GFX11 = 0x0000026c, - PH_PERF_SEL_SC5_GFX_PIPE1_TO_0_TRANSITION__GFX11 = 0x0000026d, - PH_PERF_SEL_SC5_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX11 = 0x0000026e, - PH_PERF_SEL_SC5_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX11 = 0x0000026f, - PH_PERF_SEL_SC5_PA0_FPOP_WE__GFX11 = 0x00000277, - PH_PERF_SEL_SC5_PA0_DEALLOC_WE__GFX11 = 0x0000027b, - PH_PERF_SEL_SC5_PA1_FPOP_WE__GFX11 = 0x00000283, - PH_PERF_SEL_SC5_PA1_DEALLOC_WE__GFX11 = 0x00000287, - PH_PERF_SEL_SC5_PA2_FPOP_WE__GFX11 = 0x0000028f, - PH_PERF_SEL_SC5_PA2_DEALLOC_WE__GFX11 = 0x00000293, - PH_PERF_SEL_SC5_PA3_FPOP_WE__GFX11 = 0x0000029b, - PH_PERF_SEL_SC5_PA3_DEALLOC_WE__GFX11 = 0x0000029f, - PH_PERF_SEL_SC5_PA4_FPOP_WE__GFX11 = 0x000002a7, - PH_PERF_SEL_SC5_PA4_DEALLOC_WE__GFX11 = 0x000002ab, - PH_PERF_SEL_SC5_PA5_FPOP_WE__GFX11 = 0x000002b3, - PH_PERF_SEL_SC5_PA5_DEALLOC_WE__GFX11 = 0x000002b7, - PH_PERF_SEL_SC5_PA6_FPOP_WE__GFX11 = 0x000002bf, - PH_PERF_SEL_SC5_PA6_DEALLOC_WE__GFX11 = 0x000002c3, - PH_PERF_SEL_SC5_PA7_FPOP_WE__GFX11 = 0x000002cb, - PH_PERF_SEL_SC5_PA7_DEALLOC_WE__GFX11 = 0x000002cf, - PH_PERF_SEL_SC6_GFX_PIPE0_TO_1_TRANSITION__GFX11 = 0x000002e4, - PH_PERF_SEL_SC6_GFX_PIPE1_TO_0_TRANSITION__GFX11 = 0x000002e5, - PH_PERF_SEL_SC6_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX11 = 0x000002e6, - PH_PERF_SEL_SC6_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX11 = 0x000002e7, - PH_PERF_SEL_SC6_PA0_FPOP_WE__GFX11 = 0x000002ef, - PH_PERF_SEL_SC6_PA0_DEALLOC_WE__GFX11 = 0x000002f3, - PH_PERF_SEL_SC6_PA1_FPOP_WE__GFX11 = 0x000002fb, - PH_PERF_SEL_SC6_PA1_DEALLOC_WE__GFX11 = 0x000002ff, - PH_PERF_SEL_SC6_PA2_FPOP_WE__GFX11 = 0x00000307, - PH_PERF_SEL_SC6_PA2_DEALLOC_WE__GFX11 = 0x0000030b, - PH_PERF_SEL_SC6_PA3_FPOP_WE__GFX11 = 0x00000313, - PH_PERF_SEL_SC6_PA3_DEALLOC_WE__GFX11 = 0x00000317, - PH_PERF_SEL_SC6_PA4_FPOP_WE__GFX11 = 0x0000031f, - PH_PERF_SEL_SC6_PA4_DEALLOC_WE__GFX11 = 0x00000323, - PH_PERF_SEL_SC6_PA5_FPOP_WE__GFX11 = 0x0000032b, - PH_PERF_SEL_SC6_PA5_DEALLOC_WE__GFX11 = 0x0000032f, - PH_PERF_SEL_SC6_PA6_FPOP_WE__GFX11 = 0x00000337, - PH_PERF_SEL_SC6_PA6_DEALLOC_WE__GFX11 = 0x0000033b, - PH_PERF_SEL_SC6_PA7_FPOP_WE__GFX11 = 0x00000343, - PH_PERF_SEL_SC6_PA7_DEALLOC_WE__GFX11 = 0x00000347, - PH_PERF_SEL_SC7_GFX_PIPE0_TO_1_TRANSITION__GFX11 = 0x0000035c, - PH_PERF_SEL_SC7_GFX_PIPE1_TO_0_TRANSITION__GFX11 = 0x0000035d, - PH_PERF_SEL_SC7_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__GFX11 = 0x0000035e, - PH_PERF_SEL_SC7_GFX_PIPE_EVENT_PROVOKED_TRANSITION__GFX11 = 0x0000035f, - PH_PERF_SEL_SC7_PA0_FPOP_WE__GFX11 = 0x00000367, - PH_PERF_SEL_SC7_PA0_DEALLOC_WE__GFX11 = 0x0000036b, - PH_PERF_SEL_SC7_PA1_FPOP_WE__GFX11 = 0x00000373, - PH_PERF_SEL_SC7_PA1_DEALLOC_WE__GFX11 = 0x00000377, - PH_PERF_SEL_SC7_PA2_FPOP_WE__GFX11 = 0x0000037f, - PH_PERF_SEL_SC7_PA2_DEALLOC_WE__GFX11 = 0x00000383, - PH_PERF_SEL_SC7_PA3_FPOP_WE__GFX11 = 0x0000038b, - PH_PERF_SEL_SC7_PA3_DEALLOC_WE__GFX11 = 0x0000038f, - PH_PERF_SEL_SC7_PA4_FPOP_WE__GFX11 = 0x00000397, - PH_PERF_SEL_SC7_PA4_DEALLOC_WE__GFX11 = 0x0000039b, - PH_PERF_SEL_SC7_PA5_FPOP_WE__GFX11 = 0x000003a3, - PH_PERF_SEL_SC7_PA5_DEALLOC_WE__GFX11 = 0x000003a7, - PH_PERF_SEL_SC7_PA6_FPOP_WE__GFX11 = 0x000003af, - PH_PERF_SEL_SC7_PA6_DEALLOC_WE__GFX11 = 0x000003b3, - PH_PERF_SEL_SC7_PA7_FPOP_WE__GFX11 = 0x000003bb, - PH_PERF_SEL_SC7_PA7_DEALLOC_WE__GFX11 = 0x000003bf, - PH_PERF_SC0_FIFO_STATUS_0__GFX11 = 0x000003e0, - PH_PERF_SC0_FIFO_STATUS_1__GFX11 = 0x000003e1, - PH_PERF_SC0_FIFO_STATUS_2__GFX11 = 0x000003e2, - PH_PERF_SC0_FIFO_STATUS_3__GFX11 = 0x000003e3, - PH_PERF_SC1_FIFO_STATUS_0__GFX11 = 0x000003e4, - PH_PERF_SC1_FIFO_STATUS_1__GFX11 = 0x000003e5, - PH_PERF_SC1_FIFO_STATUS_2__GFX11 = 0x000003e6, - PH_PERF_SC1_FIFO_STATUS_3__GFX11 = 0x000003e7, - PH_PERF_SC2_FIFO_STATUS_0__GFX11 = 0x000003e8, - PH_PERF_SC2_FIFO_STATUS_1__GFX11 = 0x000003e9, - PH_PERF_SC2_FIFO_STATUS_2__GFX11 = 0x000003ea, - PH_PERF_SC2_FIFO_STATUS_3__GFX11 = 0x000003eb, - PH_PERF_SC3_FIFO_STATUS_0__GFX11 = 0x000003ec, - PH_PERF_SC3_FIFO_STATUS_1__GFX11 = 0x000003ed, - PH_PERF_SC3_FIFO_STATUS_2__GFX11 = 0x000003ee, - PH_PERF_SC3_FIFO_STATUS_3__GFX11 = 0x000003ef, - PH_PERF_SC4_FIFO_STATUS_0__GFX11 = 0x000003f0, - PH_PERF_SC4_FIFO_STATUS_1__GFX11 = 0x000003f1, - PH_PERF_SC4_FIFO_STATUS_2__GFX11 = 0x000003f2, - PH_PERF_SC4_FIFO_STATUS_3__GFX11 = 0x000003f3, - PH_PERF_SC5_FIFO_STATUS_0__GFX11 = 0x000003f4, - PH_PERF_SC5_FIFO_STATUS_1__GFX11 = 0x000003f5, - PH_PERF_SC5_FIFO_STATUS_2__GFX11 = 0x000003f6, - PH_PERF_SC5_FIFO_STATUS_3__GFX11 = 0x000003f7, - PH_PERF_SC6_FIFO_STATUS_0__GFX11 = 0x000003f8, - PH_PERF_SC6_FIFO_STATUS_1__GFX11 = 0x000003f9, - PH_PERF_SC6_FIFO_STATUS_2__GFX11 = 0x000003fa, - PH_PERF_SC6_FIFO_STATUS_3__GFX11 = 0x000003fb, - PH_PERF_SC7_FIFO_STATUS_0__GFX11 = 0x000003fc, - PH_PERF_SC7_FIFO_STATUS_1__GFX11 = 0x000003fd, - PH_PERF_SC7_FIFO_STATUS_2__GFX11 = 0x000003fe, - PH_PERF_SC7_FIFO_STATUS_3__GFX11 = 0x000003ff, -#endif -#if CHIP_HDR_NAVI21 - PH_PERF_SEL_SC0_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV21 = 0x00000014, - PH_PERF_SEL_SC0_GFX_PIPE_PRIM_PROVOKED_TRANSITION__NV21 = 0x00000015, - PH_PERF_SEL_SC0_GFX_PIPE0_TO_1_TRANSITION__NV21 = 0x00000016, - PH_PERF_SEL_SC0_GFX_PIPE1_TO_0_TRANSITION__NV21 = 0x00000017, - PH_PERF_SEL_SC1_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV21 = 0x0000008c, - PH_PERF_SEL_SC1_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV21 = 0x0000008d, - PH_PERF_SEL_SC1_GFX_PIPE0_TO_1_TRANSITION__NV21 = 0x0000008e, - PH_PERF_SEL_SC1_GFX_PIPE1_TO_0_TRANSITION__NV21 = 0x0000008f, - PH_PERF_SEL_SC2_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV21 = 0x00000104, - PH_PERF_SEL_SC2_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV21 = 0x00000105, - PH_PERF_SEL_SC2_GFX_PIPE0_TO_1_TRANSITION__NV21 = 0x00000106, - PH_PERF_SEL_SC2_GFX_PIPE1_TO_0_TRANSITION__NV21 = 0x00000107, - PH_PERF_SEL_SC3_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV21 = 0x0000017c, - PH_PERF_SEL_SC3_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV21 = 0x0000017d, - PH_PERF_SEL_SC3_GFX_PIPE0_TO_1_TRANSITION__NV21 = 0x0000017e, - PH_PERF_SEL_SC3_GFX_PIPE1_TO_0_TRANSITION__NV21 = 0x0000017f, - PH_PERF_SEL_SC4_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV21 = 0x000001f4, - PH_PERF_SEL_SC4_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV21 = 0x000001f5, - PH_PERF_SEL_SC4_GFX_PIPE0_TO_1_TRANSITION__NV21 = 0x000001f6, - PH_PERF_SEL_SC4_GFX_PIPE1_TO_0_TRANSITION__NV21 = 0x000001f7, - PH_PERF_SEL_SC5_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV21 = 0x0000026c, - PH_PERF_SEL_SC5_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV21 = 0x0000026d, - PH_PERF_SEL_SC5_GFX_PIPE0_TO_1_TRANSITION__NV21 = 0x0000026e, - PH_PERF_SEL_SC5_GFX_PIPE1_TO_0_TRANSITION__NV21 = 0x0000026f, - PH_PERF_SEL_SC6_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV21 = 0x000002e4, - PH_PERF_SEL_SC6_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV21 = 0x000002e5, - PH_PERF_SEL_SC6_GFX_PIPE0_TO_1_TRANSITION__NV21 = 0x000002e6, - PH_PERF_SEL_SC6_GFX_PIPE1_TO_0_TRANSITION__NV21 = 0x000002e7, - PH_PERF_SEL_SC7_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV21 = 0x0000035c, - PH_PERF_SEL_SC7_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV21 = 0x0000035d, - PH_PERF_SEL_SC7_GFX_PIPE0_TO_1_TRANSITION__NV21 = 0x0000035e, - PH_PERF_SEL_SC7_GFX_PIPE1_TO_0_TRANSITION__NV21 = 0x0000035f, -#endif -#if CHIP_HDR_NAVI22 - PH_PERF_SEL_SC0_GFX_PIPE0_TO_1_TRANSITION__NV22 = 0x00000014, - PH_PERF_SEL_SC0_GFX_PIPE1_TO_0_TRANSITION__NV22 = 0x00000015, - PH_PERF_SEL_SC0_GFX_PIPE_PRIM_PROVOKED_TRANSITION__NV22 = 0x00000016, - PH_PERF_SEL_SC0_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV22 = 0x00000017, - PH_PERF_SEL_SC1_GFX_PIPE0_TO_1_TRANSITION__NV22 = 0x0000008c, - PH_PERF_SEL_SC1_GFX_PIPE1_TO_0_TRANSITION__NV22 = 0x0000008d, - PH_PERF_SEL_SC1_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV22 = 0x0000008e, - PH_PERF_SEL_SC1_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV22 = 0x0000008f, - PH_PERF_SEL_SC2_GFX_PIPE0_TO_1_TRANSITION__NV22 = 0x00000104, - PH_PERF_SEL_SC2_GFX_PIPE1_TO_0_TRANSITION__NV22 = 0x00000105, - PH_PERF_SEL_SC2_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV22 = 0x00000106, - PH_PERF_SEL_SC2_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV22 = 0x00000107, - PH_PERF_SEL_SC3_GFX_PIPE0_TO_1_TRANSITION__NV22 = 0x0000017c, - PH_PERF_SEL_SC3_GFX_PIPE1_TO_0_TRANSITION__NV22 = 0x0000017d, - PH_PERF_SEL_SC3_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV22 = 0x0000017e, - PH_PERF_SEL_SC3_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV22 = 0x0000017f, - PH_PERF_SEL_SC4_GFX_PIPE0_TO_1_TRANSITION__NV22 = 0x000001f4, - PH_PERF_SEL_SC4_GFX_PIPE1_TO_0_TRANSITION__NV22 = 0x000001f5, - PH_PERF_SEL_SC4_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV22 = 0x000001f6, - PH_PERF_SEL_SC4_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV22 = 0x000001f7, - PH_PERF_SEL_SC5_GFX_PIPE0_TO_1_TRANSITION__NV22 = 0x0000026c, - PH_PERF_SEL_SC5_GFX_PIPE1_TO_0_TRANSITION__NV22 = 0x0000026d, - PH_PERF_SEL_SC5_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV22 = 0x0000026e, - PH_PERF_SEL_SC5_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV22 = 0x0000026f, - PH_PERF_SEL_SC6_GFX_PIPE0_TO_1_TRANSITION__NV22 = 0x000002e4, - PH_PERF_SEL_SC6_GFX_PIPE1_TO_0_TRANSITION__NV22 = 0x000002e5, - PH_PERF_SEL_SC6_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV22 = 0x000002e6, - PH_PERF_SEL_SC6_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV22 = 0x000002e7, - PH_PERF_SEL_SC7_GFX_PIPE0_TO_1_TRANSITION__NV22 = 0x0000035c, - PH_PERF_SEL_SC7_GFX_PIPE1_TO_0_TRANSITION__NV22 = 0x0000035d, - PH_PERF_SEL_SC7_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV22 = 0x0000035e, - PH_PERF_SEL_SC7_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV22 = 0x0000035f, -#endif -#if CHIP_HDR_NAVI23 - PH_PERF_SEL_SC0_GFX_PIPE0_TO_1_TRANSITION__NV23 = 0x00000014, - PH_PERF_SEL_SC0_GFX_PIPE1_TO_0_TRANSITION__NV23 = 0x00000015, - PH_PERF_SEL_SC0_GFX_PIPE_PRIM_PROVOKED_TRANSITION__NV23 = 0x00000016, - PH_PERF_SEL_SC0_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV23 = 0x00000017, - PH_PERF_SEL_SC1_GFX_PIPE0_TO_1_TRANSITION__NV23 = 0x0000008c, - PH_PERF_SEL_SC1_GFX_PIPE1_TO_0_TRANSITION__NV23 = 0x0000008d, - PH_PERF_SEL_SC1_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV23 = 0x0000008e, - PH_PERF_SEL_SC1_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV23 = 0x0000008f, - PH_PERF_SEL_SC2_GFX_PIPE0_TO_1_TRANSITION__NV23 = 0x00000104, - PH_PERF_SEL_SC2_GFX_PIPE1_TO_0_TRANSITION__NV23 = 0x00000105, - PH_PERF_SEL_SC2_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV23 = 0x00000106, - PH_PERF_SEL_SC2_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV23 = 0x00000107, - PH_PERF_SEL_SC3_GFX_PIPE0_TO_1_TRANSITION__NV23 = 0x0000017c, - PH_PERF_SEL_SC3_GFX_PIPE1_TO_0_TRANSITION__NV23 = 0x0000017d, - PH_PERF_SEL_SC3_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV23 = 0x0000017e, - PH_PERF_SEL_SC3_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV23 = 0x0000017f, - PH_PERF_SEL_SC4_GFX_PIPE0_TO_1_TRANSITION__NV23 = 0x000001f4, - PH_PERF_SEL_SC4_GFX_PIPE1_TO_0_TRANSITION__NV23 = 0x000001f5, - PH_PERF_SEL_SC4_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV23 = 0x000001f6, - PH_PERF_SEL_SC4_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV23 = 0x000001f7, - PH_PERF_SEL_SC5_GFX_PIPE0_TO_1_TRANSITION__NV23 = 0x0000026c, - PH_PERF_SEL_SC5_GFX_PIPE1_TO_0_TRANSITION__NV23 = 0x0000026d, - PH_PERF_SEL_SC5_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV23 = 0x0000026e, - PH_PERF_SEL_SC5_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV23 = 0x0000026f, - PH_PERF_SEL_SC6_GFX_PIPE0_TO_1_TRANSITION__NV23 = 0x000002e4, - PH_PERF_SEL_SC6_GFX_PIPE1_TO_0_TRANSITION__NV23 = 0x000002e5, - PH_PERF_SEL_SC6_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV23 = 0x000002e6, - PH_PERF_SEL_SC6_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV23 = 0x000002e7, - PH_PERF_SEL_SC7_GFX_PIPE0_TO_1_TRANSITION__NV23 = 0x0000035c, - PH_PERF_SEL_SC7_GFX_PIPE1_TO_0_TRANSITION__NV23 = 0x0000035d, - PH_PERF_SEL_SC7_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV23 = 0x0000035e, - PH_PERF_SEL_SC7_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV23 = 0x0000035f, -#endif -#if CHIP_HDR_NAVI24 - PH_PERF_SEL_SC0_GFX_PIPE0_TO_1_TRANSITION__NV24 = 0x00000014, - PH_PERF_SEL_SC0_GFX_PIPE1_TO_0_TRANSITION__NV24 = 0x00000015, - PH_PERF_SEL_SC0_GFX_PIPE_PRIM_PROVOKED_TRANSITION__NV24 = 0x00000016, - PH_PERF_SEL_SC0_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV24 = 0x00000017, - PH_PERF_SEL_SC1_GFX_PIPE0_TO_1_TRANSITION__NV24 = 0x0000008c, - PH_PERF_SEL_SC1_GFX_PIPE1_TO_0_TRANSITION__NV24 = 0x0000008d, - PH_PERF_SEL_SC1_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV24 = 0x0000008e, - PH_PERF_SEL_SC1_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV24 = 0x0000008f, - PH_PERF_SEL_SC2_GFX_PIPE0_TO_1_TRANSITION__NV24 = 0x00000104, - PH_PERF_SEL_SC2_GFX_PIPE1_TO_0_TRANSITION__NV24 = 0x00000105, - PH_PERF_SEL_SC2_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV24 = 0x00000106, - PH_PERF_SEL_SC2_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV24 = 0x00000107, - PH_PERF_SEL_SC3_GFX_PIPE0_TO_1_TRANSITION__NV24 = 0x0000017c, - PH_PERF_SEL_SC3_GFX_PIPE1_TO_0_TRANSITION__NV24 = 0x0000017d, - PH_PERF_SEL_SC3_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV24 = 0x0000017e, - PH_PERF_SEL_SC3_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV24 = 0x0000017f, - PH_PERF_SEL_SC4_GFX_PIPE0_TO_1_TRANSITION__NV24 = 0x000001f4, - PH_PERF_SEL_SC4_GFX_PIPE1_TO_0_TRANSITION__NV24 = 0x000001f5, - PH_PERF_SEL_SC4_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV24 = 0x000001f6, - PH_PERF_SEL_SC4_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV24 = 0x000001f7, - PH_PERF_SEL_SC5_GFX_PIPE0_TO_1_TRANSITION__NV24 = 0x0000026c, - PH_PERF_SEL_SC5_GFX_PIPE1_TO_0_TRANSITION__NV24 = 0x0000026d, - PH_PERF_SEL_SC5_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV24 = 0x0000026e, - PH_PERF_SEL_SC5_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV24 = 0x0000026f, - PH_PERF_SEL_SC6_GFX_PIPE0_TO_1_TRANSITION__NV24 = 0x000002e4, - PH_PERF_SEL_SC6_GFX_PIPE1_TO_0_TRANSITION__NV24 = 0x000002e5, - PH_PERF_SEL_SC6_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV24 = 0x000002e6, - PH_PERF_SEL_SC6_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV24 = 0x000002e7, - PH_PERF_SEL_SC7_GFX_PIPE0_TO_1_TRANSITION__NV24 = 0x0000035c, - PH_PERF_SEL_SC7_GFX_PIPE1_TO_0_TRANSITION__NV24 = 0x0000035d, - PH_PERF_SEL_SC7_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__NV24 = 0x0000035e, - PH_PERF_SEL_SC7_GFX_PIPE_EVENT_PROVOKED_TRANSITION__NV24 = 0x0000035f, -#endif - PH_PERF_SEL_SC0_GFX_PIPE0_TO_1_TRANSITION__RAPHAEL = 0x00000014, - PH_PERF_SEL_SC0_GFX_PIPE1_TO_0_TRANSITION__RAPHAEL = 0x00000015, - PH_PERF_SEL_SC0_GFX_PIPE_PRIM_PROVOKED_TRANSITION__RAPHAEL = 0x00000016, - PH_PERF_SEL_SC0_GFX_PIPE_EVENT_PROVOKED_TRANSITION__RAPHAEL = 0x00000017, - PH_PERF_SEL_SC1_GFX_PIPE0_TO_1_TRANSITION__RAPHAEL = 0x0000008c, - PH_PERF_SEL_SC1_GFX_PIPE1_TO_0_TRANSITION__RAPHAEL = 0x0000008d, - PH_PERF_SEL_SC1_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__RAPHAEL = 0x0000008e, - PH_PERF_SEL_SC1_GFX_PIPE_EVENT_PROVOKED_TRANSITION__RAPHAEL = 0x0000008f, - PH_PERF_SEL_SC2_GFX_PIPE0_TO_1_TRANSITION__RAPHAEL = 0x00000104, - PH_PERF_SEL_SC2_GFX_PIPE1_TO_0_TRANSITION__RAPHAEL = 0x00000105, - PH_PERF_SEL_SC2_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__RAPHAEL = 0x00000106, - PH_PERF_SEL_SC2_GFX_PIPE_EVENT_PROVOKED_TRANSITION__RAPHAEL = 0x00000107, - PH_PERF_SEL_SC3_GFX_PIPE0_TO_1_TRANSITION__RAPHAEL = 0x0000017c, - PH_PERF_SEL_SC3_GFX_PIPE1_TO_0_TRANSITION__RAPHAEL = 0x0000017d, - PH_PERF_SEL_SC3_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__RAPHAEL = 0x0000017e, - PH_PERF_SEL_SC3_GFX_PIPE_EVENT_PROVOKED_TRANSITION__RAPHAEL = 0x0000017f, - PH_PERF_SEL_SC4_GFX_PIPE0_TO_1_TRANSITION__RAPHAEL = 0x000001f4, - PH_PERF_SEL_SC4_GFX_PIPE1_TO_0_TRANSITION__RAPHAEL = 0x000001f5, - PH_PERF_SEL_SC4_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__RAPHAEL = 0x000001f6, - PH_PERF_SEL_SC4_GFX_PIPE_EVENT_PROVOKED_TRANSITION__RAPHAEL = 0x000001f7, - PH_PERF_SEL_SC5_GFX_PIPE0_TO_1_TRANSITION__RAPHAEL = 0x0000026c, - PH_PERF_SEL_SC5_GFX_PIPE1_TO_0_TRANSITION__RAPHAEL = 0x0000026d, - PH_PERF_SEL_SC5_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__RAPHAEL = 0x0000026e, - PH_PERF_SEL_SC5_GFX_PIPE_EVENT_PROVOKED_TRANSITION__RAPHAEL = 0x0000026f, - PH_PERF_SEL_SC6_GFX_PIPE0_TO_1_TRANSITION__RAPHAEL = 0x000002e4, - PH_PERF_SEL_SC6_GFX_PIPE1_TO_0_TRANSITION__RAPHAEL = 0x000002e5, - PH_PERF_SEL_SC6_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__RAPHAEL = 0x000002e6, - PH_PERF_SEL_SC6_GFX_PIPE_EVENT_PROVOKED_TRANSITION__RAPHAEL = 0x000002e7, - PH_PERF_SEL_SC7_GFX_PIPE0_TO_1_TRANSITION__RAPHAEL = 0x0000035c, - PH_PERF_SEL_SC7_GFX_PIPE1_TO_0_TRANSITION__RAPHAEL = 0x0000035d, - PH_PERF_SEL_SC7_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__RAPHAEL = 0x0000035e, - PH_PERF_SEL_SC7_GFX_PIPE_EVENT_PROVOKED_TRANSITION__RAPHAEL = 0x0000035f, - PH_PERF_SEL_SC0_GFX_PIPE0_TO_1_TRANSITION__REMBRANDT = 0x00000014, - PH_PERF_SEL_SC0_GFX_PIPE1_TO_0_TRANSITION__REMBRANDT = 0x00000015, - PH_PERF_SEL_SC0_GFX_PIPE_PRIM_PROVOKED_TRANSITION__REMBRANDT = 0x00000016, - PH_PERF_SEL_SC0_GFX_PIPE_EVENT_PROVOKED_TRANSITION__REMBRANDT = 0x00000017, - PH_PERF_SEL_SC1_GFX_PIPE0_TO_1_TRANSITION__REMBRANDT = 0x0000008c, - PH_PERF_SEL_SC1_GFX_PIPE1_TO_0_TRANSITION__REMBRANDT = 0x0000008d, - PH_PERF_SEL_SC1_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__REMBRANDT = 0x0000008e, - PH_PERF_SEL_SC1_GFX_PIPE_EVENT_PROVOKED_TRANSITION__REMBRANDT = 0x0000008f, - PH_PERF_SEL_SC2_GFX_PIPE0_TO_1_TRANSITION__REMBRANDT = 0x00000104, - PH_PERF_SEL_SC2_GFX_PIPE1_TO_0_TRANSITION__REMBRANDT = 0x00000105, - PH_PERF_SEL_SC2_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__REMBRANDT = 0x00000106, - PH_PERF_SEL_SC2_GFX_PIPE_EVENT_PROVOKED_TRANSITION__REMBRANDT = 0x00000107, - PH_PERF_SEL_SC3_GFX_PIPE0_TO_1_TRANSITION__REMBRANDT = 0x0000017c, - PH_PERF_SEL_SC3_GFX_PIPE1_TO_0_TRANSITION__REMBRANDT = 0x0000017d, - PH_PERF_SEL_SC3_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__REMBRANDT = 0x0000017e, - PH_PERF_SEL_SC3_GFX_PIPE_EVENT_PROVOKED_TRANSITION__REMBRANDT = 0x0000017f, - PH_PERF_SEL_SC4_GFX_PIPE0_TO_1_TRANSITION__REMBRANDT = 0x000001f4, - PH_PERF_SEL_SC4_GFX_PIPE1_TO_0_TRANSITION__REMBRANDT = 0x000001f5, - PH_PERF_SEL_SC4_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__REMBRANDT = 0x000001f6, - PH_PERF_SEL_SC4_GFX_PIPE_EVENT_PROVOKED_TRANSITION__REMBRANDT = 0x000001f7, - PH_PERF_SEL_SC5_GFX_PIPE0_TO_1_TRANSITION__REMBRANDT = 0x0000026c, - PH_PERF_SEL_SC5_GFX_PIPE1_TO_0_TRANSITION__REMBRANDT = 0x0000026d, - PH_PERF_SEL_SC5_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__REMBRANDT = 0x0000026e, - PH_PERF_SEL_SC5_GFX_PIPE_EVENT_PROVOKED_TRANSITION__REMBRANDT = 0x0000026f, - PH_PERF_SEL_SC6_GFX_PIPE0_TO_1_TRANSITION__REMBRANDT = 0x000002e4, - PH_PERF_SEL_SC6_GFX_PIPE1_TO_0_TRANSITION__REMBRANDT = 0x000002e5, - PH_PERF_SEL_SC6_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__REMBRANDT = 0x000002e6, - PH_PERF_SEL_SC6_GFX_PIPE_EVENT_PROVOKED_TRANSITION__REMBRANDT = 0x000002e7, - PH_PERF_SEL_SC7_GFX_PIPE0_TO_1_TRANSITION__REMBRANDT = 0x0000035c, - PH_PERF_SEL_SC7_GFX_PIPE1_TO_0_TRANSITION__REMBRANDT = 0x0000035d, - PH_PERF_SEL_SC7_GFX_PIPE_EOP_PRIM_PROVOKED_TRANSITION__REMBRANDT = 0x0000035e, - PH_PERF_SEL_SC7_GFX_PIPE_EVENT_PROVOKED_TRANSITION__REMBRANDT = 0x0000035f, -} PH_PERFCNT_SEL; - -constexpr unsigned int MaxPhPerfcntSelGfx101 = PH_PERF_SEL_SC7_PA7_DEALLOC_4_0_RD__GFX10; -constexpr unsigned int MaxPhPerfcntSelGfx103Derivative = PH_PERF_SEL_8_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxPhPerfcntSelGfx11 = PH_PERF_SC7_FIFO_STATUS_3__GFX11; -#endif - -typedef enum PipeConfig { - ADDR_SURF_P2 = 0x00000000, - ADDR_SURF_P2_RESERVED0 = 0x00000001, - ADDR_SURF_P2_RESERVED1 = 0x00000002, - ADDR_SURF_P2_RESERVED2 = 0x00000003, - ADDR_SURF_P4_8x16 = 0x00000004, - ADDR_SURF_P4_16x16 = 0x00000005, - ADDR_SURF_P4_16x32 = 0x00000006, - ADDR_SURF_P4_32x32 = 0x00000007, - ADDR_SURF_P8_16x16_8x16 = 0x00000008, - ADDR_SURF_P8_16x32_8x16 = 0x00000009, - ADDR_SURF_P8_32x32_8x16 = 0x0000000a, - ADDR_SURF_P8_16x32_16x16 = 0x0000000b, - ADDR_SURF_P8_32x32_16x16 = 0x0000000c, - ADDR_SURF_P8_32x32_16x32 = 0x0000000d, - ADDR_SURF_P8_32x64_32x32 = 0x0000000e, - ADDR_SURF_P8_RESERVED0 = 0x0000000f, - ADDR_SURF_P16_32x32_8x16 = 0x00000010, - ADDR_SURF_P16_32x32_16x16 = 0x00000011, - ADDR_SURF_P16__GFX10PLUS = 0x00000012, -} PipeConfig; - -typedef enum PixelPipeCounterId { - PIXEL_PIPE_OCCLUSION_COUNT_0 = 0x00000000, - PIXEL_PIPE_OCCLUSION_COUNT_1 = 0x00000001, - PIXEL_PIPE_OCCLUSION_COUNT_2 = 0x00000002, - PIXEL_PIPE_OCCLUSION_COUNT_3 = 0x00000003, - PIXEL_PIPE_SCREEN_MIN_EXTENTS_0__CORE = 0x00000004, - PIXEL_PIPE_SCREEN_MAX_EXTENTS_0__CORE = 0x00000005, - PIXEL_PIPE_SCREEN_MIN_EXTENTS_1__CORE = 0x00000006, - PIXEL_PIPE_SCREEN_MAX_EXTENTS_1__CORE = 0x00000007, -} PixelPipeCounterId; - -typedef enum PixelPipeStride { - PIXEL_PIPE_STRIDE_32_BITS = 0x00000000, - PIXEL_PIPE_STRIDE_64_BITS = 0x00000001, - PIXEL_PIPE_STRIDE_128_BITS = 0x00000002, - PIXEL_PIPE_STRIDE_256_BITS = 0x00000003, -} PixelPipeStride; - -typedef enum QuadExportFormat { - EXPORT_UNUSED = 0x00000000, - EXPORT_32_R = 0x00000001, - EXPORT_32_GR = 0x00000002, - EXPORT_32_AR = 0x00000003, - EXPORT_FP16_ABGR = 0x00000004, - EXPORT_UNSIGNED16_ABGR = 0x00000005, - EXPORT_SIGNED16_ABGR = 0x00000006, - EXPORT_32_ABGR = 0x00000007, - EXPORT_32BPP_8PIX = 0x00000008, - EXPORT_16_16_UNSIGNED_8PIX = 0x00000009, - EXPORT_16_16_SIGNED_8PIX = 0x0000000a, - EXPORT_16_16_FLOAT_8PIX = 0x0000000b, -} QuadExportFormat; - -typedef enum QuadExportFormatOld { - EXPORT_4P_32BPC_ABGR = 0x00000000, - EXPORT_4P_16BPC_ABGR = 0x00000001, - EXPORT_4P_32BPC_GR = 0x00000002, - EXPORT_4P_32BPC_AR = 0x00000003, - EXPORT_2P_32BPC_ABGR = 0x00000004, - EXPORT_8P_32BPC_R = 0x00000005, -} QuadExportFormatOld; - -typedef enum ReadPolicy { - CACHE_LRU_RD = 0x00000000, - CACHE_STREAM_RD = 0x00000001, - CACHE_NOA = 0x00000002, - RESERVED_RDPOLICY = 0x00000003, -} ReadPolicy; - -typedef enum RLC_PERFCOUNTER_SEL { - RLC_PERF_SEL_POWER_FEATURE_0 = 0x00000000, - RLC_PERF_SEL_POWER_FEATURE_1 = 0x00000001, - RLC_PERF_SEL_CP_INTERRUPT = 0x00000002, - RLC_PERF_SEL_GRBM_INTERRUPT = 0x00000003, - RLC_PERF_SEL_SPM_INTERRUPT = 0x00000004, - RLC_PERF_SEL_IH_INTERRUPT = 0x00000005, - RLC_PERF_SEL_SERDES_COMMAND_WRITE = 0x00000006, -} RLC_PERFCOUNTER_SEL; - -constexpr unsigned int MaxRlcPerfcounterSel = RLC_PERF_SEL_SERDES_COMMAND_WRITE; - -typedef enum RLC_PERFMON_STATE { - RLC_PERFMON_STATE_RESET = 0x00000000, - RLC_PERFMON_STATE_ENABLE = 0x00000001, - RLC_PERFMON_STATE_DISABLE = 0x00000002, - RLC_PERFMON_STATE_RESERVED_3 = 0x00000003, - RLC_PERFMON_STATE_RESERVED_4 = 0x00000004, - RLC_PERFMON_STATE_RESERVED_5 = 0x00000005, - RLC_PERFMON_STATE_RESERVED_6 = 0x00000006, - RLC_PERFMON_STATE_ROLLOVER = 0x00000007, -} RLC_PERFMON_STATE; - -typedef enum RMIPerfSel { - RMI_PERF_SEL_NONE__CORE = 0x00000000, - RMI_PERF_SEL_BUSY__CORE = 0x00000001, - RMI_PERF_SEL_REG_CLK_VLD__CORE = 0x00000002, - RMI_PERF_SEL_DYN_CLK_CMN_VLD__CORE = 0x00000003, - RMI_PERF_SEL_DYN_CLK_RB_VLD__CORE = 0x00000004, - RMI_PERF_SEL_DYN_CLK_PERF_VLD__CORE = 0x00000005, - RMI_PERF_SEL_PERF_WINDOW__CORE = 0x00000006, - RMI_PERF_SEL_EVENT_SEND__CORE = 0x00000007, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID0__GFX09 = 0x00000008, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID1__GFX09 = 0x00000009, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID2__GFX09 = 0x0000000a, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID3__GFX09 = 0x0000000b, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID4__GFX09 = 0x0000000c, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID5__GFX09 = 0x0000000d, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID6__GFX09 = 0x0000000e, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID7__GFX09 = 0x0000000f, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID8__GFX09 = 0x00000010, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID9__GFX09 = 0x00000011, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID10__GFX09 = 0x00000012, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID11__GFX09 = 0x00000013, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID12__GFX09 = 0x00000014, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID13__GFX09 = 0x00000015, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID14__GFX09 = 0x00000016, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID15__GFX09 = 0x00000017, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID_ALL__GFX09 = 0x00000018, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID0__GFX09 = 0x00000019, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID1__GFX09 = 0x0000001a, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID2__GFX09 = 0x0000001b, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID3__GFX09 = 0x0000001c, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID4__GFX09 = 0x0000001d, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID5__GFX09 = 0x0000001e, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID6__GFX09 = 0x0000001f, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID7__GFX09 = 0x00000020, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID8__GFX09 = 0x00000021, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID9__GFX09 = 0x00000022, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID10__GFX09 = 0x00000023, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID11__GFX09 = 0x00000024, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID12__GFX09 = 0x00000025, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID13__GFX09 = 0x00000026, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID14__GFX09 = 0x00000027, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID15__GFX09 = 0x00000028, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID_ALL__GFX09 = 0x00000029, - RMI_PERF_SEL_UTCL1_TRANSLATION_MISS__GFX09 = 0x0000002a, - RMI_PERF_SEL_UTCL1_PERMISSION_MISS__GFX09 = 0x0000002b, - RMI_PERF_SEL_UTCL1_TRANSLATION_HIT__GFX09 = 0x0000002c, - RMI_PERF_SEL_UTCL1_REQUEST__GFX09 = 0x0000002d, - RMI_PERF_SEL_UTCL1_STALL_INFLIGHT_MAX__GFX09 = 0x0000002e, - RMI_PERF_SEL_UTCL1_STALL_LRU_INFLIGHT__GFX09 = 0x0000002f, - RMI_PERF_SEL_UTCL1_LFIFO_FULL__GFX09 = 0x00000030, - RMI_PERF_SEL_UTCL1_STALL_LFIFO_NOT_RES__GFX09 = 0x00000031, - RMI_PERF_SEL_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS__GFX09 = 0x00000032, - RMI_PERF_SEL_UTCL1_STALL_MISSFIFO_FULL__GFX09 = 0x00000033, - RMI_PERF_SEL_UTCL1_HIT_FIFO_FULL__GFX09 = 0x00000034, - RMI_PERF_SEL_UTCL1_STALL_MULTI_MISS__GFX09 = 0x00000035, - RMI_PERF_SEL_RB_RMI_WRREQ_ALL_CID__GFX09 = 0x00000036, - RMI_PERF_SEL_RB_RMI_WRREQ_TO_WRRET_BUSY__GFX09 = 0x00000037, - RMI_PERF_SEL_RB_RMI_WRREQ_CID0__GFX09 = 0x00000038, - RMI_PERF_SEL_RB_RMI_WRREQ_CID1__GFX09 = 0x00000039, - RMI_PERF_SEL_RB_RMI_WRREQ_CID2__GFX09 = 0x0000003a, - RMI_PERF_SEL_RB_RMI_WRREQ_CID3__GFX09 = 0x0000003b, - RMI_PERF_SEL_RB_RMI_WRREQ_CID4__GFX09 = 0x0000003c, - RMI_PERF_SEL_RB_RMI_WRREQ_CID5__GFX09 = 0x0000003d, - RMI_PERF_SEL_RB_RMI_WRREQ_CID6__GFX09 = 0x0000003e, - RMI_PERF_SEL_RB_RMI_WRREQ_CID7__GFX09 = 0x0000003f, - RMI_PERF_SEL_RB_RMI_32BWRREQ_INFLIGHT_ALL_ORONE_CID__GFX09 = 0x00000040, - RMI_PERF_SEL_RB_RMI_WRREQ_BURST_LENGTH_ALL_ORONE_CID__GFX09 = 0x00000041, - RMI_PERF_SEL_RB_RMI_WRREQ_BURST_ALL_ORONE_CID__GFX09 = 0x00000042, - RMI_PERF_SEL_RB_RMI_WRREQ_RESIDENCY__GFX09 = 0x00000043, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_ALL_CID__GFX09 = 0x00000044, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID0__GFX09 = 0x00000045, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID1__GFX09 = 0x00000046, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID2__GFX09 = 0x00000047, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID3__GFX09 = 0x00000048, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID4__GFX09 = 0x00000049, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID5__GFX09 = 0x0000004a, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID6__GFX09 = 0x0000004b, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID7__GFX09 = 0x0000004c, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK0__GFX09 = 0x0000004d, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK1__GFX09 = 0x0000004e, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK2__GFX09 = 0x0000004f, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK3__GFX09 = 0x00000050, - RMI_PERF_SEL_RB_RMI_32BRDREQ_ALL_CID__GFX09 = 0x00000051, - RMI_PERF_SEL_RB_RMI_RDREQ_ALL_CID__GFX09 = 0x00000052, - RMI_PERF_SEL_RB_RMI_RDREQ_TO_RDRET_BUSY__GFX09 = 0x00000053, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID0__GFX09 = 0x00000054, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID1__GFX09 = 0x00000055, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID2__GFX09 = 0x00000056, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID3__GFX09 = 0x00000057, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID4__GFX09 = 0x00000058, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID5__GFX09 = 0x00000059, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID6__GFX09 = 0x0000005a, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID7__GFX09 = 0x0000005b, - RMI_PERF_SEL_RB_RMI_RDREQ_CID0__GFX09 = 0x0000005c, - RMI_PERF_SEL_RB_RMI_RDREQ_CID1__GFX09 = 0x0000005d, - RMI_PERF_SEL_RB_RMI_RDREQ_CID2__GFX09 = 0x0000005e, - RMI_PERF_SEL_RB_RMI_RDREQ_CID3__GFX09 = 0x0000005f, - RMI_PERF_SEL_RB_RMI_RDREQ_CID4__GFX09 = 0x00000060, - RMI_PERF_SEL_RB_RMI_RDREQ_CID5__GFX09 = 0x00000061, - RMI_PERF_SEL_RB_RMI_RDREQ_CID6__GFX09 = 0x00000062, - RMI_PERF_SEL_RB_RMI_RDREQ_CID7__GFX09 = 0x00000063, - RMI_PERF_SEL_RB_RMI_32BRDREQ_INFLIGHT_ALL_ORONE_CID__GFX09 = 0x00000064, - RMI_PERF_SEL_RB_RMI_RDREQ_BURST_LENGTH_ALL_ORONE_CID__GFX09 = 0x00000065, - RMI_PERF_SEL_RB_RMI_RDREQ_BURST_ALL_ORONE_CID__GFX09 = 0x00000066, - RMI_PERF_SEL_RB_RMI_RDREQ_RESIDENCY__GFX09 = 0x00000067, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_ALL_CID__GFX09 = 0x00000068, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID0__GFX09 = 0x00000069, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID1__GFX09 = 0x0000006a, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID2__GFX09 = 0x0000006b, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID3__GFX09 = 0x0000006c, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID4__GFX09 = 0x0000006d, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID5__GFX09 = 0x0000006e, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID6__GFX09 = 0x0000006f, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID7__GFX09 = 0x00000070, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK0__GFX09 = 0x00000071, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK1__GFX09 = 0x00000072, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK2__GFX09 = 0x00000073, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK3__GFX09 = 0x00000074, - RMI_PERF_SEL_RB_RMI_WR_FIFO_MAX__GFX09 = 0x00000075, - RMI_PERF_SEL_RB_RMI_WR_FIFO_EMPTY__GFX09 = 0x00000076, - RMI_PERF_SEL_RB_RMI_WR_IDLE__GFX09 = 0x00000077, - RMI_PERF_SEL_RB_RMI_WR_STARVE__GFX09 = 0x00000078, - RMI_PERF_SEL_RB_RMI_WR_STALL__GFX09 = 0x00000079, - RMI_PERF_SEL_RB_RMI_WR_BUSY__GFX09 = 0x0000007a, - RMI_PERF_SEL_RB_RMI_WR_INTF_BUSY__GFX09 = 0x0000007b, - RMI_PERF_SEL_RB_RMI_RD_FIFO_MAX__GFX09 = 0x0000007c, - RMI_PERF_SEL_RB_RMI_RD_FIFO_EMPTY__GFX09 = 0x0000007d, - RMI_PERF_SEL_RB_RMI_RD_IDLE__GFX09 = 0x0000007e, - RMI_PERF_SEL_RB_RMI_RD_STARVE__GFX09 = 0x0000007f, - RMI_PERF_SEL_RB_RMI_RD_STALL__GFX09 = 0x00000080, - RMI_PERF_SEL_RB_RMI_RD_BUSY__GFX09 = 0x00000081, - RMI_PERF_SEL_RB_RMI_RD_INTF_BUSY__GFX09 = 0x00000082, - RMI_PERF_SEL_RMI_TC_64BWRREQ_ALL_ORONE_CID__GFX09 = 0x00000083, - RMI_PERF_SEL_RMI_TC_64BRDREQ_ALL_ORONE_CID__GFX09 = 0x00000084, - RMI_PERF_SEL_RMI_TC_WRREQ_ALL_CID__GFX09 = 0x00000085, - RMI_PERF_SEL_RMI_TC_REQ_BUSY__GFX09 = 0x00000086, - RMI_PERF_SEL_RMI_TC_WRREQ_CID0__GFX09 = 0x00000087, - RMI_PERF_SEL_RMI_TC_WRREQ_CID1__GFX09 = 0x00000088, - RMI_PERF_SEL_RMI_TC_WRREQ_CID2__GFX09 = 0x00000089, - RMI_PERF_SEL_RMI_TC_WRREQ_CID3__GFX09 = 0x0000008a, - RMI_PERF_SEL_RMI_TC_WRREQ_CID4__GFX09 = 0x0000008b, - RMI_PERF_SEL_RMI_TC_WRREQ_CID5__GFX09 = 0x0000008c, - RMI_PERF_SEL_RMI_TC_WRREQ_CID6__GFX09 = 0x0000008d, - RMI_PERF_SEL_RMI_TC_WRREQ_CID7__GFX09 = 0x0000008e, - RMI_PERF_SEL_RMI_TC_WRREQ_INFLIGHT_ALL_CID__GFX09 = 0x0000008f, - RMI_PERF_SEL_TC_RMI_WRRET_VALID_ALL_CID__GFX09 = 0x00000090, - RMI_PERF_SEL_RMI_TC_RDREQ_ALL_CID__GFX09 = 0x00000091, - RMI_PERF_SEL_RMI_TC_RDREQ_CID0__GFX09 = 0x00000092, - RMI_PERF_SEL_RMI_TC_RDREQ_CID1__GFX09 = 0x00000093, - RMI_PERF_SEL_RMI_TC_RDREQ_CID2__GFX09 = 0x00000094, - RMI_PERF_SEL_RMI_TC_RDREQ_CID3__GFX09 = 0x00000095, - RMI_PERF_SEL_RMI_TC_RDREQ_CID4__GFX09 = 0x00000096, - RMI_PERF_SEL_RMI_TC_RDREQ_CID5__GFX09 = 0x00000097, - RMI_PERF_SEL_RMI_TC_RDREQ_CID6__GFX09 = 0x00000098, - RMI_PERF_SEL_RMI_TC_RDREQ_CID7__GFX09 = 0x00000099, - RMI_PERF_SEL_RMI_TC_STALL_RDREQ__GFX09 = 0x0000009a, - RMI_PERF_SEL_RMI_TC_STALL_WRREQ__GFX09 = 0x0000009b, - RMI_PERF_SEL_RMI_TC_STALL_ALLREQ__GFX09 = 0x0000009c, - RMI_PERF_SEL_RMI_TC_CREDIT_FULL_NO_PENDING_SEND__GFX09 = 0x0000009d, - RMI_PERF_SEL_RMI_TC_CREDIT_ZERO_PENDING_SEND__GFX09 = 0x0000009e, - RMI_PERF_SEL_RMI_TC_RDREQ_INFLIGHT_ALL_CID__GFX09 = 0x0000009f, - RMI_PERF_SEL_TC_RMI_RDRET_VALID_ALL_CID__GFX09 = 0x000000a0, - RMI_PERF_SEL_UTCL1_BUSY__GFX09 = 0x000000a1, - RMI_PERF_SEL_RMI_UTC_REQ__GFX09 = 0x000000a2, - RMI_PERF_SEL_RMI_UTC_BUSY__GFX09 = 0x000000a3, - RMI_PERF_SEL_UTCL1_UTCL2_REQ__GFX09 = 0x000000a4, - RMI_PERF_SEL_LEVEL_ADD_UTCL1_TO_UTCL2__GFX09 = 0x000000a5, - RMI_PERF_SEL_PROBE_UTCL1_XNACK_RETRY__GFX09 = 0x000000a6, - RMI_PERF_SEL_PROBE_UTCL1_ALL_FAULT__GFX09 = 0x000000a7, - RMI_PERF_SEL_PROBE_UTCL1_PRT_FAULT__GFX09 = 0x000000a8, - RMI_PERF_SEL_PROBE_UTCL1_VMID_BYPASS__GFX09 = 0x000000a9, - RMI_PERF_SEL_PROBE_UTCL1_XNACK_NORETRY_FAULT__GFX09 = 0x000000aa, - RMI_PERF_SEL_XNACK_FIFO_NUM_USED__GFX09 = 0x000000ab, - RMI_PERF_SEL_LAT_FIFO_NUM_USED__GFX09 = 0x000000ac, - RMI_PERF_SEL_LAT_FIFO_BLOCKING_REQ__GFX09 = 0x000000ad, - RMI_PERF_SEL_LAT_FIFO_NONBLOCKING_REQ__GFX09 = 0x000000ae, - RMI_PERF_SEL_XNACK_FIFO_FULL__GFX09 = 0x000000af, - RMI_PERF_SEL_XNACK_FIFO_BUSY__GFX09 = 0x000000b0, - RMI_PERF_SEL_LAT_FIFO_FULL__GFX09 = 0x000000b1, - RMI_PERF_SEL_SKID_FIFO_DEPTH__GFX09 = 0x000000b2, - RMI_PERF_SEL_TCIW_INFLIGHT_COUNT__GFX09 = 0x000000b3, - RMI_PERF_SEL_PRT_FIFO_NUM_USED__GFX09 = 0x000000b4, - RMI_PERF_SEL_PRT_FIFO_REQ__GFX09 = 0x000000b5, - RMI_PERF_SEL_PRT_FIFO_BUSY__GFX09 = 0x000000b6, - RMI_PERF_SEL_TCIW_REQ__GFX09 = 0x000000b7, - RMI_PERF_SEL_TCIW_BUSY__GFX09 = 0x000000b8, - RMI_PERF_SEL_SKID_FIFO_REQ__GFX09 = 0x000000b9, - RMI_PERF_SEL_SKID_FIFO_BUSY__GFX09 = 0x000000ba, - RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK0__GFX09 = 0x000000bb, - RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK1__GFX09 = 0x000000bc, - RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK2__GFX09 = 0x000000bd, - RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK3__GFX09 = 0x000000be, - RMI_PERF_SEL_XBAR_PROBEGEN_RTS_RTR__GFX09 = 0x000000bf, - RMI_PERF_SEL_XBAR_PROBEGEN_RTSB_RTR__GFX09 = 0x000000c0, - RMI_PERF_SEL_XBAR_PROBEGEN_RTS_RTRB__GFX09 = 0x000000c1, - RMI_PERF_SEL_XBAR_PROBEGEN_RTSB_RTRB__GFX09 = 0x000000c2, - RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTS_RTR__GFX09 = 0x000000c3, - RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTSB_RTR__GFX09 = 0x000000c4, - RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTS_RTRB__GFX09 = 0x000000c5, - RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTSB_RTRB__GFX09 = 0x000000c6, - RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTS_RTR__GFX09 = 0x000000c7, - RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTSB_RTR__GFX09 = 0x000000c8, - RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTS_RTRB__GFX09 = 0x000000c9, - RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTSB_RTRB__GFX09 = 0x000000ca, - RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTS_RTR__GFX09 = 0x000000cb, - RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTSB_RTR__GFX09 = 0x000000cc, - RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTS_RTRB__GFX09 = 0x000000cd, - RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTSB_RTRB__GFX09 = 0x000000ce, - RMI_PERF_SEL_POP_DEMUX_RTS_RTR__GFX09 = 0x000000cf, - RMI_PERF_SEL_POP_DEMUX_RTSB_RTR__GFX09 = 0x000000d0, - RMI_PERF_SEL_POP_DEMUX_RTS_RTRB__GFX09 = 0x000000d1, - RMI_PERF_SEL_POP_DEMUX_RTSB_RTRB__GFX09 = 0x000000d2, - RMI_PERF_SEL_PROBEGEN_UTC_RTS_RTR__GFX09 = 0x000000d3, - RMI_PERF_SEL_LEVEL_ADD_RMI_TO_UTC__GFX09 = 0x000000d4, - RMI_PERF_SEL_PROBEGEN_UTC_RTSB_RTR__GFX09 = 0x000000d5, - RMI_PERF_SEL_PROBEGEN_UTC_RTS_RTRB__GFX09 = 0x000000d6, - RMI_PERF_SEL_PROBEGEN_UTC_RTSB_RTRB__GFX09 = 0x000000d7, - RMI_PERF_SEL_UTC_POP_RTS_RTR__GFX09 = 0x000000d8, - RMI_PERF_SEL_UTC_POP_RTSB_RTR__GFX09 = 0x000000d9, - RMI_PERF_SEL_UTC_POP_RTS_RTRB__GFX09 = 0x000000da, - RMI_PERF_SEL_UTC_POP_RTSB_RTRB__GFX09 = 0x000000db, - RMI_PERF_SEL_POP_XNACK_RTS_RTR__GFX09 = 0x000000dc, - RMI_PERF_SEL_POP_XNACK_RTSB_RTR__GFX09 = 0x000000dd, - RMI_PERF_SEL_POP_XNACK_RTS_RTRB__GFX09 = 0x000000de, - RMI_PERF_SEL_POP_XNACK_RTSB_RTRB__GFX09 = 0x000000df, - RMI_PERF_SEL_XNACK_PROBEGEN_RTS_RTR__GFX09 = 0x000000e0, - RMI_PERF_SEL_XNACK_PROBEGEN_RTSB_RTR__GFX09 = 0x000000e1, - RMI_PERF_SEL_XNACK_PROBEGEN_RTS_RTRB__GFX09 = 0x000000e2, - RMI_PERF_SEL_XNACK_PROBEGEN_RTSB_RTRB__GFX09 = 0x000000e3, - RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTS_RTR__GFX09 = 0x000000e4, - RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTSB_RTR__GFX09 = 0x000000e5, - RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTS_RTRB__GFX09 = 0x000000e6, - RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTSB_RTRB__GFX09 = 0x000000e7, - RMI_PERF_SEL_SKID_FIFO_IN_RTS__GFX09 = 0x000000e8, - RMI_PERF_SEL_SKID_FIFO_IN_RTSB__GFX09 = 0x000000e9, - RMI_PERF_SEL_SKID_FIFO_OUT_RTS__GFX09 = 0x000000ea, - RMI_PERF_SEL_SKID_FIFO_OUT_RTSB__GFX09 = 0x000000eb, - RMI_PERF_SEL_XBAR_PROBEGEN_READ_RTS_RTR__GFX09 = 0x000000ec, - RMI_PERF_SEL_XBAR_PROBEGEN_WRITE_RTS_RTR__GFX09 = 0x000000ed, - RMI_PERF_SEL_XBAR_PROBEGEN_IN0_RTS_RTR__GFX09 = 0x000000ee, - RMI_PERF_SEL_XBAR_PROBEGEN_IN1_RTS_RTR__GFX09 = 0x000000ef, - RMI_PERF_SEL_XBAR_PROBEGEN_CB_RTS_RTR__GFX09 = 0x000000f0, - RMI_PERF_SEL_XBAR_PROBEGEN_DB_RTS_RTR__GFX09 = 0x000000f1, - RMI_PERF_SEL_REORDER_FIFO_REQ__GFX09 = 0x000000f2, - RMI_PERF_SEL_REORDER_FIFO_BUSY__GFX09 = 0x000000f3, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_ALL_CID__GFX09 = 0x000000f4, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID0__GFX09 = 0x000000f5, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID1__GFX09 = 0x000000f6, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID2__GFX09 = 0x000000f7, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID3__GFX09 = 0x000000f8, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID4__GFX09 = 0x000000f9, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID5__GFX09 = 0x000000fa, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID6__GFX09 = 0x000000fb, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID7__GFX09 = 0x000000fc, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK0__GFX09 = 0x000000fd, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK1__GFX09 = 0x000000fe, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK2__GFX09 = 0x000000ff, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3__GFX09 = 0x00000100, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID0__GFX10CORE = 0x00000008, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID1__GFX10CORE = 0x00000009, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID2__GFX10CORE = 0x0000000a, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID3__GFX10CORE = 0x0000000b, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID4__GFX10CORE = 0x0000000c, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID5__GFX10CORE = 0x0000000d, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID6__GFX10CORE = 0x0000000e, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID7__GFX10CORE = 0x0000000f, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID8__GFX10CORE = 0x00000010, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID9__GFX10CORE = 0x00000011, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID10__GFX10CORE = 0x00000012, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID11__GFX10CORE = 0x00000013, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID12__GFX10CORE = 0x00000014, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID13__GFX10CORE = 0x00000015, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID14__GFX10CORE = 0x00000016, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID15__GFX10CORE = 0x00000017, - RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID_ALL__GFX10CORE = 0x00000018, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID0__GFX10CORE = 0x00000019, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID1__GFX10CORE = 0x0000001a, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID2__GFX10CORE = 0x0000001b, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID3__GFX10CORE = 0x0000001c, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID4__GFX10CORE = 0x0000001d, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID5__GFX10CORE = 0x0000001e, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID6__GFX10CORE = 0x0000001f, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID7__GFX10CORE = 0x00000020, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID8__GFX10CORE = 0x00000021, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID9__GFX10CORE = 0x00000022, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID10__GFX10CORE = 0x00000023, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID11__GFX10CORE = 0x00000024, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID12__GFX10CORE = 0x00000025, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID13__GFX10CORE = 0x00000026, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID14__GFX10CORE = 0x00000027, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID15__GFX10CORE = 0x00000028, - RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID_ALL__GFX10CORE = 0x00000029, - RMI_PERF_SEL_UTCL1_TRANSLATION_MISS__GFX10CORE = 0x0000002a, - RMI_PERF_SEL_UTCL1_PERMISSION_MISS__GFX10CORE = 0x0000002b, - RMI_PERF_SEL_UTCL1_TRANSLATION_HIT__GFX10CORE = 0x0000002c, - RMI_PERF_SEL_UTCL1_REQUEST__GFX10CORE = 0x0000002d, - RMI_PERF_SEL_UTCL1_STALL_INFLIGHT_MAX__GFX10CORE = 0x0000002e, - RMI_PERF_SEL_UTCL1_STALL_LRU_INFLIGHT__GFX10CORE = 0x0000002f, - RMI_PERF_SEL_UTCL1_LFIFO_FULL__GFX10CORE = 0x00000030, - RMI_PERF_SEL_UTCL1_STALL_LFIFO_NOT_RES__GFX10CORE = 0x00000031, - RMI_PERF_SEL_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS__GFX10CORE = 0x00000032, - RMI_PERF_SEL_UTCL1_STALL_MISSFIFO_FULL__GFX10CORE = 0x00000033, - RMI_PERF_SEL_UTCL1_HIT_FIFO_FULL__GFX10CORE = 0x00000034, - RMI_PERF_SEL_UTCL1_STALL_MULTI_MISS__GFX10CORE = 0x00000035, - RMI_PERF_SEL_RB_RMI_WRREQ_ALL_CID__GFX10CORE = 0x00000036, - RMI_PERF_SEL_RB_RMI_WRREQ_TO_WRRET_BUSY__GFX10CORE = 0x00000037, - RMI_PERF_SEL_RB_RMI_WRREQ_CID0__GFX10CORE = 0x00000038, - RMI_PERF_SEL_RB_RMI_WRREQ_CID1__GFX10CORE = 0x00000039, - RMI_PERF_SEL_RB_RMI_WRREQ_CID2__GFX10CORE = 0x0000003a, - RMI_PERF_SEL_RB_RMI_WRREQ_CID3__GFX10CORE = 0x0000003b, - RMI_PERF_SEL_RB_RMI_WRREQ_CID4__GFX10CORE = 0x0000003c, - RMI_PERF_SEL_RB_RMI_WRREQ_CID5__GFX10CORE = 0x0000003d, - RMI_PERF_SEL_RB_RMI_WRREQ_CID6__GFX10CORE = 0x0000003e, - RMI_PERF_SEL_RB_RMI_WRREQ_CID7__GFX10CORE = 0x0000003f, - RMI_PERF_SEL_RB_RMI_32BWRREQ_INFLIGHT_ALL_ORONE_CID__GFX10CORE = 0x00000040, - RMI_PERF_SEL_RB_RMI_WRREQ_BURST_LENGTH_ALL_ORONE_CID__GFX10CORE = 0x00000041, - RMI_PERF_SEL_RB_RMI_WRREQ_BURST_ALL_ORONE_CID__GFX10CORE = 0x00000042, - RMI_PERF_SEL_RB_RMI_WRREQ_RESIDENCY__GFX10CORE = 0x00000043, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_ALL_CID__GFX10CORE = 0x00000044, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID0__GFX10CORE = 0x00000045, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID1__GFX10CORE = 0x00000046, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID2__GFX10CORE = 0x00000047, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID3__GFX10CORE = 0x00000048, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID4__GFX10CORE = 0x00000049, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID5__GFX10CORE = 0x0000004a, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID6__GFX10CORE = 0x0000004b, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID7__GFX10CORE = 0x0000004c, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK0__GFX10CORE = 0x0000004d, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK1__GFX10CORE = 0x0000004e, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK2__GFX10CORE = 0x0000004f, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK3__GFX10CORE = 0x00000050, - RMI_PERF_SEL_RB_RMI_32BRDREQ_ALL_CID__GFX10CORE = 0x00000051, - RMI_PERF_SEL_RB_RMI_RDREQ_ALL_CID__GFX10CORE = 0x00000052, - RMI_PERF_SEL_RB_RMI_RDREQ_TO_RDRET_BUSY__GFX10CORE = 0x00000053, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID0__GFX10CORE = 0x00000054, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID1__GFX10CORE = 0x00000055, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID2__GFX10CORE = 0x00000056, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID3__GFX10CORE = 0x00000057, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID4__GFX10CORE = 0x00000058, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID5__GFX10CORE = 0x00000059, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID6__GFX10CORE = 0x0000005a, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID7__GFX10CORE = 0x0000005b, - RMI_PERF_SEL_RB_RMI_RDREQ_CID0__GFX10CORE = 0x0000005c, - RMI_PERF_SEL_RB_RMI_RDREQ_CID1__GFX10CORE = 0x0000005d, - RMI_PERF_SEL_RB_RMI_RDREQ_CID2__GFX10CORE = 0x0000005e, - RMI_PERF_SEL_RB_RMI_RDREQ_CID3__GFX10CORE = 0x0000005f, - RMI_PERF_SEL_RB_RMI_RDREQ_CID4__GFX10CORE = 0x00000060, - RMI_PERF_SEL_RB_RMI_RDREQ_CID5__GFX10CORE = 0x00000061, - RMI_PERF_SEL_RB_RMI_RDREQ_CID6__GFX10CORE = 0x00000062, - RMI_PERF_SEL_RB_RMI_RDREQ_CID7__GFX10CORE = 0x00000063, - RMI_PERF_SEL_RB_RMI_32BRDREQ_INFLIGHT_ALL_ORONE_CID__GFX10CORE = 0x00000064, - RMI_PERF_SEL_RB_RMI_RDREQ_BURST_LENGTH_ALL_ORONE_CID__GFX10CORE = 0x00000065, - RMI_PERF_SEL_RB_RMI_RDREQ_BURST_ALL_ORONE_CID__GFX10CORE = 0x00000066, - RMI_PERF_SEL_RB_RMI_RDREQ_RESIDENCY__GFX10CORE = 0x00000067, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_ALL_CID__GFX10CORE = 0x00000068, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID0__GFX10CORE = 0x00000069, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID1__GFX10CORE = 0x0000006a, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID2__GFX10CORE = 0x0000006b, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID3__GFX10CORE = 0x0000006c, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID4__GFX10CORE = 0x0000006d, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID5__GFX10CORE = 0x0000006e, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID6__GFX10CORE = 0x0000006f, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID7__GFX10CORE = 0x00000070, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK0__GFX10CORE = 0x00000071, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK1__GFX10CORE = 0x00000072, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK2__GFX10CORE = 0x00000073, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK3__GFX10CORE = 0x00000074, - RMI_PERF_SEL_RB_RMI_WR_FIFO_MAX__GFX10CORE = 0x00000075, - RMI_PERF_SEL_RB_RMI_WR_FIFO_EMPTY__GFX10CORE = 0x00000076, - RMI_PERF_SEL_RB_RMI_WR_IDLE__GFX10CORE = 0x00000077, - RMI_PERF_SEL_RB_RMI_WR_STARVE__GFX10CORE = 0x00000078, - RMI_PERF_SEL_RB_RMI_WR_STALL__GFX10CORE = 0x00000079, - RMI_PERF_SEL_RB_RMI_WR_BUSY__GFX10CORE = 0x0000007a, - RMI_PERF_SEL_RB_RMI_WR_INTF_BUSY__GFX10CORE = 0x0000007b, - RMI_PERF_SEL_RB_RMI_RD_FIFO_MAX__GFX10CORE = 0x0000007c, - RMI_PERF_SEL_RB_RMI_RD_FIFO_EMPTY__GFX10CORE = 0x0000007d, - RMI_PERF_SEL_RB_RMI_RD_IDLE__GFX10CORE = 0x0000007e, - RMI_PERF_SEL_RB_RMI_RD_STARVE__GFX10CORE = 0x0000007f, - RMI_PERF_SEL_RB_RMI_RD_STALL__GFX10CORE = 0x00000080, - RMI_PERF_SEL_RB_RMI_RD_BUSY__GFX10CORE = 0x00000081, - RMI_PERF_SEL_RB_RMI_RD_INTF_BUSY__GFX10CORE = 0x00000082, - RMI_PERF_SEL_RMI_TC_64BWRREQ_ALL_ORONE_CID__GFX10CORE = 0x00000083, - RMI_PERF_SEL_RMI_TC_64BRDREQ_ALL_ORONE_CID__GFX10CORE = 0x00000084, - RMI_PERF_SEL_RMI_TC_WRREQ_ALL_CID__GFX10CORE = 0x00000085, - RMI_PERF_SEL_RMI_TC_REQ_BUSY__GFX10CORE = 0x00000086, - RMI_PERF_SEL_RMI_TC_WRREQ_CID0__GFX10CORE = 0x00000087, - RMI_PERF_SEL_RMI_TC_WRREQ_CID1__GFX10CORE = 0x00000088, - RMI_PERF_SEL_RMI_TC_WRREQ_CID2__GFX10CORE = 0x00000089, - RMI_PERF_SEL_RMI_TC_WRREQ_CID3__GFX10CORE = 0x0000008a, - RMI_PERF_SEL_RMI_TC_WRREQ_CID4__GFX10CORE = 0x0000008b, - RMI_PERF_SEL_RMI_TC_WRREQ_CID5__GFX10CORE = 0x0000008c, - RMI_PERF_SEL_RMI_TC_WRREQ_CID6__GFX10CORE = 0x0000008d, - RMI_PERF_SEL_RMI_TC_WRREQ_CID7__GFX10CORE = 0x0000008e, - RMI_PERF_SEL_RMI_TC_WRREQ_INFLIGHT_ALL_CID__GFX10CORE = 0x0000008f, - RMI_PERF_SEL_TC_RMI_WRRET_VALID_ALL_CID__GFX10CORE = 0x00000090, - RMI_PERF_SEL_RMI_TC_RDREQ_ALL_CID__GFX10CORE = 0x00000091, - RMI_PERF_SEL_RMI_TC_RDREQ_CID0__GFX10CORE = 0x00000092, - RMI_PERF_SEL_RMI_TC_RDREQ_CID1__GFX10CORE = 0x00000093, - RMI_PERF_SEL_RMI_TC_RDREQ_CID2__GFX10CORE = 0x00000094, - RMI_PERF_SEL_RMI_TC_RDREQ_CID3__GFX10CORE = 0x00000095, - RMI_PERF_SEL_RMI_TC_RDREQ_CID4__GFX10CORE = 0x00000096, - RMI_PERF_SEL_RMI_TC_RDREQ_CID5__GFX10CORE = 0x00000097, - RMI_PERF_SEL_RMI_TC_RDREQ_CID6__GFX10CORE = 0x00000098, - RMI_PERF_SEL_RMI_TC_RDREQ_CID7__GFX10CORE = 0x00000099, - RMI_PERF_SEL_RMI_TC_STALL_RDREQ__GFX10CORE = 0x0000009a, - RMI_PERF_SEL_RMI_TC_STALL_WRREQ__GFX10CORE = 0x0000009b, - RMI_PERF_SEL_RMI_TC_STALL_ALLREQ__GFX10CORE = 0x0000009c, - RMI_PERF_SEL_RMI_TC_CREDIT_FULL_NO_PENDING_SEND__GFX10CORE = 0x0000009d, - RMI_PERF_SEL_RMI_TC_CREDIT_ZERO_PENDING_SEND__GFX10CORE = 0x0000009e, - RMI_PERF_SEL_RMI_TC_RDREQ_INFLIGHT_ALL_CID__GFX10CORE = 0x0000009f, - RMI_PERF_SEL_TC_RMI_RDRET_VALID_ALL_CID__GFX10CORE = 0x000000a0, - RMI_PERF_SEL_UTCL1_BUSY__GFX10CORE = 0x000000a1, - RMI_PERF_SEL_RMI_UTC_REQ__GFX10CORE = 0x000000a2, - RMI_PERF_SEL_RMI_UTC_BUSY__GFX10CORE = 0x000000a3, - RMI_PERF_SEL_UTCL1_UTCL2_REQ__GFX10CORE = 0x000000a4, - RMI_PERF_SEL_LEVEL_ADD_UTCL1_TO_UTCL2__GFX10CORE = 0x000000a5, - RMI_PERF_SEL_PROBE_UTCL1_XNACK_RETRY__GFX10CORE = 0x000000a6, - RMI_PERF_SEL_PROBE_UTCL1_ALL_FAULT__GFX10CORE = 0x000000a7, - RMI_PERF_SEL_PROBE_UTCL1_PRT_FAULT__GFX10CORE = 0x000000a8, - RMI_PERF_SEL_PROBE_UTCL1_VMID_BYPASS__GFX10CORE = 0x000000a9, - RMI_PERF_SEL_PROBE_UTCL1_XNACK_NORETRY_FAULT__GFX10CORE = 0x000000aa, - RMI_PERF_SEL_XNACK_FIFO_NUM_USED__GFX10CORE = 0x000000ab, - RMI_PERF_SEL_LAT_FIFO_NUM_USED__GFX10CORE = 0x000000ac, - RMI_PERF_SEL_LAT_FIFO_BLOCKING_REQ__GFX10CORE = 0x000000ad, - RMI_PERF_SEL_LAT_FIFO_NONBLOCKING_REQ__GFX10CORE = 0x000000ae, - RMI_PERF_SEL_XNACK_FIFO_FULL__GFX10CORE = 0x000000af, - RMI_PERF_SEL_XNACK_FIFO_BUSY__GFX10CORE = 0x000000b0, - RMI_PERF_SEL_LAT_FIFO_FULL__GFX10CORE = 0x000000b1, - RMI_PERF_SEL_SKID_FIFO_DEPTH__GFX10CORE = 0x000000b2, - RMI_PERF_SEL_TCIW_INFLIGHT_COUNT__GFX10CORE = 0x000000b3, - RMI_PERF_SEL_PRT_FIFO_NUM_USED__GFX10CORE = 0x000000b4, - RMI_PERF_SEL_PRT_FIFO_REQ__GFX10CORE = 0x000000b5, - RMI_PERF_SEL_PRT_FIFO_BUSY__GFX10CORE = 0x000000b6, - RMI_PERF_SEL_TCIW_REQ__GFX10CORE = 0x000000b7, - RMI_PERF_SEL_TCIW_BUSY__GFX10CORE = 0x000000b8, - RMI_PERF_SEL_SKID_FIFO_REQ__GFX10CORE = 0x000000b9, - RMI_PERF_SEL_SKID_FIFO_BUSY__GFX10CORE = 0x000000ba, - RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK0__GFX10CORE = 0x000000bb, - RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK1__GFX10CORE = 0x000000bc, - RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK2__GFX10CORE = 0x000000bd, - RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK3__GFX10CORE = 0x000000be, - RMI_PERF_SEL_XBAR_PROBEGEN_RTS_RTR__GFX10CORE = 0x000000bf, - RMI_PERF_SEL_XBAR_PROBEGEN_RTSB_RTR__GFX10CORE = 0x000000c0, - RMI_PERF_SEL_XBAR_PROBEGEN_RTS_RTRB__GFX10CORE = 0x000000c1, - RMI_PERF_SEL_XBAR_PROBEGEN_RTSB_RTRB__GFX10CORE = 0x000000c2, - RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTS_RTR__GFX10CORE = 0x000000c3, - RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTSB_RTR__GFX10CORE = 0x000000c4, - RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTS_RTRB__GFX10CORE = 0x000000c5, - RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTSB_RTRB__GFX10CORE = 0x000000c6, - RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTS_RTR__GFX10CORE = 0x000000c7, - RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTSB_RTR__GFX10CORE = 0x000000c8, - RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTS_RTRB__GFX10CORE = 0x000000c9, - RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTSB_RTRB__GFX10CORE = 0x000000ca, - RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTS_RTR__GFX10CORE = 0x000000cb, - RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTSB_RTR__GFX10CORE = 0x000000cc, - RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTS_RTRB__GFX10CORE = 0x000000cd, - RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTSB_RTRB__GFX10CORE = 0x000000ce, - RMI_PERF_SEL_POP_DEMUX_RTS_RTR__GFX10CORE = 0x000000cf, - RMI_PERF_SEL_POP_DEMUX_RTSB_RTR__GFX10CORE = 0x000000d0, - RMI_PERF_SEL_POP_DEMUX_RTS_RTRB__GFX10CORE = 0x000000d1, - RMI_PERF_SEL_POP_DEMUX_RTSB_RTRB__GFX10CORE = 0x000000d2, - RMI_PERF_SEL_PROBEGEN_UTC_RTS_RTR__GFX10CORE = 0x000000d3, - RMI_PERF_SEL_LEVEL_ADD_RMI_TO_UTC__GFX10CORE = 0x000000d4, - RMI_PERF_SEL_PROBEGEN_UTC_RTSB_RTR__GFX10CORE = 0x000000d5, - RMI_PERF_SEL_PROBEGEN_UTC_RTS_RTRB__GFX10CORE = 0x000000d6, - RMI_PERF_SEL_PROBEGEN_UTC_RTSB_RTRB__GFX10CORE = 0x000000d7, - RMI_PERF_SEL_UTC_POP_RTS_RTR__GFX10CORE = 0x000000d8, - RMI_PERF_SEL_UTC_POP_RTSB_RTR__GFX10CORE = 0x000000d9, - RMI_PERF_SEL_UTC_POP_RTS_RTRB__GFX10CORE = 0x000000da, - RMI_PERF_SEL_UTC_POP_RTSB_RTRB__GFX10CORE = 0x000000db, - RMI_PERF_SEL_POP_XNACK_RTS_RTR__GFX10CORE = 0x000000dc, - RMI_PERF_SEL_POP_XNACK_RTSB_RTR__GFX10CORE = 0x000000dd, - RMI_PERF_SEL_POP_XNACK_RTS_RTRB__GFX10CORE = 0x000000de, - RMI_PERF_SEL_POP_XNACK_RTSB_RTRB__GFX10CORE = 0x000000df, - RMI_PERF_SEL_XNACK_PROBEGEN_RTS_RTR__GFX10CORE = 0x000000e0, - RMI_PERF_SEL_XNACK_PROBEGEN_RTSB_RTR__GFX10CORE = 0x000000e1, - RMI_PERF_SEL_XNACK_PROBEGEN_RTS_RTRB__GFX10CORE = 0x000000e2, - RMI_PERF_SEL_XNACK_PROBEGEN_RTSB_RTRB__GFX10CORE = 0x000000e3, - RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTS_RTR__GFX10CORE = 0x000000e4, - RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTSB_RTR__GFX10CORE = 0x000000e5, - RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTS_RTRB__GFX10CORE = 0x000000e6, - RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTSB_RTRB__GFX10CORE = 0x000000e7, - RMI_PERF_SEL_SKID_FIFO_IN_RTS__GFX10CORE = 0x000000e8, - RMI_PERF_SEL_SKID_FIFO_IN_RTSB__GFX10CORE = 0x000000e9, - RMI_PERF_SEL_SKID_FIFO_OUT_RTS__GFX10CORE = 0x000000ea, - RMI_PERF_SEL_SKID_FIFO_OUT_RTSB__GFX10CORE = 0x000000eb, - RMI_PERF_SEL_XBAR_PROBEGEN_READ_RTS_RTR__GFX10CORE = 0x000000ec, - RMI_PERF_SEL_XBAR_PROBEGEN_WRITE_RTS_RTR__GFX10CORE = 0x000000ed, - RMI_PERF_SEL_XBAR_PROBEGEN_IN0_RTS_RTR__GFX10CORE = 0x000000ee, - RMI_PERF_SEL_XBAR_PROBEGEN_IN1_RTS_RTR__GFX10CORE = 0x000000ef, - RMI_PERF_SEL_XBAR_PROBEGEN_CB_RTS_RTR__GFX10CORE = 0x000000f0, - RMI_PERF_SEL_XBAR_PROBEGEN_DB_RTS_RTR__GFX10CORE = 0x000000f1, - RMI_PERF_SEL_REORDER_FIFO_REQ__GFX10CORE = 0x000000f2, - RMI_PERF_SEL_REORDER_FIFO_BUSY__GFX10CORE = 0x000000f3, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_ALL_CID__GFX10CORE = 0x000000f4, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID0__GFX10CORE = 0x000000f5, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID1__GFX10CORE = 0x000000f6, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID2__GFX10CORE = 0x000000f7, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID3__GFX10CORE = 0x000000f8, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID4__GFX10CORE = 0x000000f9, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID5__GFX10CORE = 0x000000fa, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID6__GFX10CORE = 0x000000fb, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID7__GFX10CORE = 0x000000fc, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK0__GFX10CORE = 0x000000fd, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK1__GFX10CORE = 0x000000fe, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK2__GFX10CORE = 0x000000ff, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3__GFX10CORE = 0x00000100, - RMI_PERF_SEL_UTCL0_UTCL1_PERM_FAULT__GFX10CORE = 0x00000101, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - RMI_PERF_SEL_RB_RMI_WRREQ_ALL_CID__GFX11 = 0x00000008, - RMI_PERF_SEL_RB_RMI_WRREQ_TO_WRRET_BUSY__GFX11 = 0x00000009, - RMI_PERF_SEL_RB_RMI_WRREQ_CID0__GFX11 = 0x0000000a, - RMI_PERF_SEL_RB_RMI_WRREQ_CID1__GFX11 = 0x0000000b, - RMI_PERF_SEL_RB_RMI_WRREQ_CID2__GFX11 = 0x0000000c, - RMI_PERF_SEL_RB_RMI_WRREQ_CID3__GFX11 = 0x0000000d, - RMI_PERF_SEL_RB_RMI_WRREQ_CID4__GFX11 = 0x0000000e, - RMI_PERF_SEL_RB_RMI_WRREQ_CID5__GFX11 = 0x0000000f, - RMI_PERF_SEL_RB_RMI_WRREQ_CID6__GFX11 = 0x00000010, - RMI_PERF_SEL_RB_RMI_WRREQ_CID7__GFX11 = 0x00000011, - RMI_PERF_SEL_RB_RMI_32BWRREQ_INFLIGHT_ALL_ORONE_CID__GFX11 = 0x00000012, - RMI_PERF_SEL_RB_RMI_WRREQ_BURST_LENGTH_ALL_ORONE_CID__GFX11 = 0x00000013, - RMI_PERF_SEL_RB_RMI_WRREQ_BURST_ALL_ORONE_CID__GFX11 = 0x00000014, - RMI_PERF_SEL_RB_RMI_WRREQ_RESIDENCY__GFX11 = 0x00000015, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_ALL_CID__GFX11 = 0x00000016, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID0__GFX11 = 0x00000017, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID1__GFX11 = 0x00000018, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID2__GFX11 = 0x00000019, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID3__GFX11 = 0x0000001a, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID4__GFX11 = 0x0000001b, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID5__GFX11 = 0x0000001c, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID6__GFX11 = 0x0000001d, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID7__GFX11 = 0x0000001e, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK0__GFX11 = 0x0000001f, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK1__GFX11 = 0x00000020, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK2__GFX11 = 0x00000021, - RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK3__GFX11 = 0x00000022, - RMI_PERF_SEL_RB_RMI_32BRDREQ_ALL_CID__GFX11 = 0x00000023, - RMI_PERF_SEL_RB_RMI_RDREQ_ALL_CID__GFX11 = 0x00000024, - RMI_PERF_SEL_RB_RMI_RDREQ_TO_RDRET_BUSY__GFX11 = 0x00000025, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID0__GFX11 = 0x00000026, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID1__GFX11 = 0x00000027, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID2__GFX11 = 0x00000028, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID3__GFX11 = 0x00000029, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID4__GFX11 = 0x0000002a, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID5__GFX11 = 0x0000002b, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID6__GFX11 = 0x0000002c, - RMI_PERF_SEL_RB_RMI_32BRDREQ_CID7__GFX11 = 0x0000002d, - RMI_PERF_SEL_RB_RMI_RDREQ_CID0__GFX11 = 0x0000002e, - RMI_PERF_SEL_RB_RMI_RDREQ_CID1__GFX11 = 0x0000002f, - RMI_PERF_SEL_RB_RMI_RDREQ_CID2__GFX11 = 0x00000030, - RMI_PERF_SEL_RB_RMI_RDREQ_CID3__GFX11 = 0x00000031, - RMI_PERF_SEL_RB_RMI_RDREQ_CID4__GFX11 = 0x00000032, - RMI_PERF_SEL_RB_RMI_RDREQ_CID5__GFX11 = 0x00000033, - RMI_PERF_SEL_RB_RMI_RDREQ_CID6__GFX11 = 0x00000034, - RMI_PERF_SEL_RB_RMI_RDREQ_CID7__GFX11 = 0x00000035, - RMI_PERF_SEL_RB_RMI_32BRDREQ_INFLIGHT_ALL_ORONE_CID__GFX11 = 0x00000036, - RMI_PERF_SEL_RB_RMI_RDREQ_BURST_LENGTH_ALL_ORONE_CID__GFX11 = 0x00000037, - RMI_PERF_SEL_RB_RMI_RDREQ_BURST_ALL_ORONE_CID__GFX11 = 0x00000038, - RMI_PERF_SEL_RB_RMI_RDREQ_RESIDENCY__GFX11 = 0x00000039, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_ALL_CID__GFX11 = 0x0000003a, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID0__GFX11 = 0x0000003b, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID1__GFX11 = 0x0000003c, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID2__GFX11 = 0x0000003d, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID3__GFX11 = 0x0000003e, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID4__GFX11 = 0x0000003f, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID5__GFX11 = 0x00000040, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID6__GFX11 = 0x00000041, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID7__GFX11 = 0x00000042, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK0__GFX11 = 0x00000043, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK1__GFX11 = 0x00000044, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK2__GFX11 = 0x00000045, - RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK3__GFX11 = 0x00000046, - RMI_PERF_SEL_RB_RMI_WR_FIFO_MAX__GFX11 = 0x00000047, - RMI_PERF_SEL_RB_RMI_WR_FIFO_EMPTY__GFX11 = 0x00000048, - RMI_PERF_SEL_RB_RMI_WR_IDLE__GFX11 = 0x00000049, - RMI_PERF_SEL_RB_RMI_WR_STARVE__GFX11 = 0x0000004a, - RMI_PERF_SEL_RB_RMI_WR_STALL__GFX11 = 0x0000004b, - RMI_PERF_SEL_RB_RMI_WR_BUSY__GFX11 = 0x0000004c, - RMI_PERF_SEL_RB_RMI_WR_INTF_BUSY__GFX11 = 0x0000004d, - RMI_PERF_SEL_RB_RMI_RD_FIFO_MAX__GFX11 = 0x0000004e, - RMI_PERF_SEL_RB_RMI_RD_FIFO_EMPTY__GFX11 = 0x0000004f, - RMI_PERF_SEL_RB_RMI_RD_IDLE__GFX11 = 0x00000050, - RMI_PERF_SEL_RB_RMI_RD_STARVE__GFX11 = 0x00000051, - RMI_PERF_SEL_RB_RMI_RD_STALL__GFX11 = 0x00000052, - RMI_PERF_SEL_RB_RMI_RD_BUSY__GFX11 = 0x00000053, - RMI_PERF_SEL_RB_RMI_RD_INTF_BUSY__GFX11 = 0x00000054, - RMI_PERF_SEL_RMI_TC_64BWRREQ_ALL_ORONE_CID__GFX11 = 0x00000055, - RMI_PERF_SEL_RMI_TC_64BRDREQ_ALL_ORONE_CID__GFX11 = 0x00000056, - RMI_PERF_SEL_RMI_TC_WRREQ_ALL_CID__GFX11 = 0x00000057, - RMI_PERF_SEL_RMI_TC_REQ_BUSY__GFX11 = 0x00000058, - RMI_PERF_SEL_RMI_TC_WRREQ_CID0__GFX11 = 0x00000059, - RMI_PERF_SEL_RMI_TC_WRREQ_CID1__GFX11 = 0x0000005a, - RMI_PERF_SEL_RMI_TC_WRREQ_CID2__GFX11 = 0x0000005b, - RMI_PERF_SEL_RMI_TC_WRREQ_CID3__GFX11 = 0x0000005c, - RMI_PERF_SEL_RMI_TC_WRREQ_CID4__GFX11 = 0x0000005d, - RMI_PERF_SEL_RMI_TC_WRREQ_CID5__GFX11 = 0x0000005e, - RMI_PERF_SEL_RMI_TC_WRREQ_CID6__GFX11 = 0x0000005f, - RMI_PERF_SEL_RMI_TC_WRREQ_CID7__GFX11 = 0x00000060, - RMI_PERF_SEL_RMI_TC_WRREQ_INFLIGHT_ALL_CID__GFX11 = 0x00000061, - RMI_PERF_SEL_TC_RMI_WRRET_VALID_ALL_CID__GFX11 = 0x00000062, - RMI_PERF_SEL_RMI_TC_RDREQ_ALL_CID__GFX11 = 0x00000063, - RMI_PERF_SEL_RMI_TC_RDREQ_CID0__GFX11 = 0x00000064, - RMI_PERF_SEL_RMI_TC_RDREQ_CID1__GFX11 = 0x00000065, - RMI_PERF_SEL_RMI_TC_RDREQ_CID2__GFX11 = 0x00000066, - RMI_PERF_SEL_RMI_TC_RDREQ_CID3__GFX11 = 0x00000067, - RMI_PERF_SEL_RMI_TC_RDREQ_CID4__GFX11 = 0x00000068, - RMI_PERF_SEL_RMI_TC_RDREQ_CID5__GFX11 = 0x00000069, - RMI_PERF_SEL_RMI_TC_RDREQ_CID6__GFX11 = 0x0000006a, - RMI_PERF_SEL_RMI_TC_RDREQ_CID7__GFX11 = 0x0000006b, - RMI_PERF_SEL_RMI_TC_STALL_RDREQ__GFX11 = 0x0000006c, - RMI_PERF_SEL_RMI_TC_STALL_WRREQ__GFX11 = 0x0000006d, - RMI_PERF_SEL_RMI_TC_STALL_ALLREQ__GFX11 = 0x0000006e, - RMI_PERF_SEL_RMI_TC_CREDIT_FULL_NO_PENDING_SEND__GFX11 = 0x0000006f, - RMI_PERF_SEL_RMI_TC_CREDIT_ZERO_PENDING_SEND__GFX11 = 0x00000070, - RMI_PERF_SEL_RMI_TC_RDREQ_INFLIGHT_ALL_CID__GFX11 = 0x00000071, - RMI_PERF_SEL_TC_RMI_RDRET_VALID_ALL_CID__GFX11 = 0x00000072, - RMI_PERF_SEL_TCIW_INFLIGHT_COUNT__GFX11 = 0x00000073, - RMI_PERF_SEL_TCIW_REQ__GFX11 = 0x00000074, - RMI_PERF_SEL_TCIW_BUSY__GFX11 = 0x00000075, - RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTS_RTR__GFX11 = 0x00000076, - RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTSB_RTR__GFX11 = 0x00000077, - RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTS_RTRB__GFX11 = 0x00000078, - RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTSB_RTRB__GFX11 = 0x00000079, - RMI_PERF_SEL_REORDER_FIFO_REQ__GFX11 = 0x0000007a, - RMI_PERF_SEL_REORDER_FIFO_BUSY__GFX11 = 0x0000007b, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_ALL_CID__GFX11 = 0x0000007c, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID0__GFX11 = 0x0000007d, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID1__GFX11 = 0x0000007e, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID2__GFX11 = 0x0000007f, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID3__GFX11 = 0x00000080, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID4__GFX11 = 0x00000081, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID5__GFX11 = 0x00000082, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID6__GFX11 = 0x00000083, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID7__GFX11 = 0x00000084, - RMI_PERF_SEL_CONSUMER_PROBEGEN_READ_RTS_RTR__GFX11 = 0x00000085, - RMI_PERF_SEL_CONSUMER_PROBEGEN_WRITE_RTS_RTR__GFX11 = 0x00000086, - RMI_PERF_SEL_CONSUMER_PROBEGEN_IN0_RTS_RTR__GFX11 = 0x00000087, - RMI_PERF_SEL_CONSUMER_PROBEGEN_IN1_RTS_RTR__GFX11 = 0x00000088, - RMI_PERF_SEL_CONSUMER_PROBEGEN_CB_RTS_RTR__GFX11 = 0x00000089, - RMI_PERF_SEL_CONSUMER_PROBEGEN_DB_RTS_RTR__GFX11 = 0x0000008a, -#endif -} RMIPerfSel; - -constexpr unsigned int MaxRMIPerfSelGfx09 = RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3__GFX09; -constexpr unsigned int MaxRMIPerfSelGfx10Core = RMI_PERF_SEL_UTCL0_UTCL1_PERM_FAULT__GFX10CORE; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxRMIPerfSelGfx11 = RMI_PERF_SEL_CONSUMER_PROBEGEN_DB_RTS_RTR__GFX11; -#endif - -typedef enum RoundMode { - ROUND_BY_HALF = 0x00000000, - ROUND_TRUNCATE = 0x00000001, -} RoundMode; - -typedef enum SC_PERFCNT_SEL { - SC_SRPS_WINDOW_VALID = 0x00000000, - SC_PSSW_WINDOW_VALID = 0x00000001, - SC_TPQZ_WINDOW_VALID = 0x00000002, - SC_QZQP_WINDOW_VALID = 0x00000003, - SC_TRPK_WINDOW_VALID = 0x00000004, - SC_SRPS_WINDOW_VALID_BUSY = 0x00000005, - SC_PSSW_WINDOW_VALID_BUSY = 0x00000006, - SC_TPQZ_WINDOW_VALID_BUSY = 0x00000007, - SC_QZQP_WINDOW_VALID_BUSY = 0x00000008, - SC_TRPK_WINDOW_VALID_BUSY = 0x00000009, - SC_STARVED_BY_PA = 0x0000000a, - SC_STALLED_BY_PRIMFIFO = 0x0000000b, - SC_STALLED_BY_DB_TILE = 0x0000000c, - SC_STARVED_BY_DB_TILE = 0x0000000d, - SC_STALLED_BY_TILEORDERFIFO = 0x0000000e, - SC_STALLED_BY_TILEFIFO = 0x0000000f, - SC_STALLED_BY_DB_QUAD = 0x00000010, - SC_STARVED_BY_DB_QUAD = 0x00000011, - SC_STALLED_BY_QUADFIFO = 0x00000012, - SC_STALLED_BY_BCI = 0x00000013, - SC_STALLED_BY_SPI = 0x00000014, - SC_SCISSOR_DISCARD = 0x00000015, - SC_BB_DISCARD = 0x00000016, - SC_SUPERTILE_COUNT = 0x00000017, - SC_SUPERTILE_PER_PRIM_H0 = 0x00000018, - SC_SUPERTILE_PER_PRIM_H1 = 0x00000019, - SC_SUPERTILE_PER_PRIM_H2 = 0x0000001a, - SC_SUPERTILE_PER_PRIM_H3 = 0x0000001b, - SC_SUPERTILE_PER_PRIM_H4 = 0x0000001c, - SC_SUPERTILE_PER_PRIM_H5 = 0x0000001d, - SC_SUPERTILE_PER_PRIM_H6 = 0x0000001e, - SC_SUPERTILE_PER_PRIM_H7 = 0x0000001f, - SC_SUPERTILE_PER_PRIM_H8 = 0x00000020, - SC_SUPERTILE_PER_PRIM_H9 = 0x00000021, - SC_SUPERTILE_PER_PRIM_H10 = 0x00000022, - SC_SUPERTILE_PER_PRIM_H11 = 0x00000023, - SC_SUPERTILE_PER_PRIM_H12 = 0x00000024, - SC_SUPERTILE_PER_PRIM_H13 = 0x00000025, - SC_SUPERTILE_PER_PRIM_H14 = 0x00000026, - SC_SUPERTILE_PER_PRIM_H15 = 0x00000027, - SC_SUPERTILE_PER_PRIM_H16 = 0x00000028, - SC_TILE_PER_PRIM_H0 = 0x00000029, - SC_TILE_PER_PRIM_H1 = 0x0000002a, - SC_TILE_PER_PRIM_H2 = 0x0000002b, - SC_TILE_PER_PRIM_H3 = 0x0000002c, - SC_TILE_PER_PRIM_H4 = 0x0000002d, - SC_TILE_PER_PRIM_H5 = 0x0000002e, - SC_TILE_PER_PRIM_H6 = 0x0000002f, - SC_TILE_PER_PRIM_H7 = 0x00000030, - SC_TILE_PER_PRIM_H8 = 0x00000031, - SC_TILE_PER_PRIM_H9 = 0x00000032, - SC_TILE_PER_PRIM_H10 = 0x00000033, - SC_TILE_PER_PRIM_H11 = 0x00000034, - SC_TILE_PER_PRIM_H12 = 0x00000035, - SC_TILE_PER_PRIM_H13 = 0x00000036, - SC_TILE_PER_PRIM_H14 = 0x00000037, - SC_TILE_PER_PRIM_H15 = 0x00000038, - SC_TILE_PER_PRIM_H16 = 0x00000039, - SC_TILE_PER_SUPERTILE_H0 = 0x0000003a, - SC_TILE_PER_SUPERTILE_H1 = 0x0000003b, - SC_TILE_PER_SUPERTILE_H2 = 0x0000003c, - SC_TILE_PER_SUPERTILE_H3 = 0x0000003d, - SC_TILE_PER_SUPERTILE_H4 = 0x0000003e, - SC_TILE_PER_SUPERTILE_H5 = 0x0000003f, - SC_TILE_PER_SUPERTILE_H6 = 0x00000040, - SC_TILE_PER_SUPERTILE_H7 = 0x00000041, - SC_TILE_PER_SUPERTILE_H8 = 0x00000042, - SC_TILE_PER_SUPERTILE_H9 = 0x00000043, - SC_TILE_PER_SUPERTILE_H10 = 0x00000044, - SC_TILE_PER_SUPERTILE_H11 = 0x00000045, - SC_TILE_PER_SUPERTILE_H12 = 0x00000046, - SC_TILE_PER_SUPERTILE_H13 = 0x00000047, - SC_TILE_PER_SUPERTILE_H14 = 0x00000048, - SC_TILE_PER_SUPERTILE_H15 = 0x00000049, - SC_TILE_PER_SUPERTILE_H16 = 0x0000004a, - SC_TILE_PICKED_H1 = 0x0000004b, - SC_QZ0_TILE_COUNT = 0x0000004f, - SC_QZ0_TILE_COVERED_COUNT = 0x00000053, - SC_QZ0_TILE_NOT_COVERED_COUNT = 0x00000057, - SC_QZ0_QUAD_PER_TILE_H0 = 0x0000005b, - SC_QZ0_QUAD_PER_TILE_H1 = 0x0000005c, - SC_QZ0_QUAD_PER_TILE_H2 = 0x0000005d, - SC_QZ0_QUAD_PER_TILE_H3 = 0x0000005e, - SC_QZ0_QUAD_PER_TILE_H4 = 0x0000005f, - SC_QZ0_QUAD_PER_TILE_H5 = 0x00000060, - SC_QZ0_QUAD_PER_TILE_H6 = 0x00000061, - SC_QZ0_QUAD_PER_TILE_H7 = 0x00000062, - SC_QZ0_QUAD_PER_TILE_H8 = 0x00000063, - SC_QZ0_QUAD_PER_TILE_H9 = 0x00000064, - SC_QZ0_QUAD_PER_TILE_H10 = 0x00000065, - SC_QZ0_QUAD_PER_TILE_H11 = 0x00000066, - SC_QZ0_QUAD_PER_TILE_H12 = 0x00000067, - SC_QZ0_QUAD_PER_TILE_H13 = 0x00000068, - SC_QZ0_QUAD_PER_TILE_H14 = 0x00000069, - SC_QZ0_QUAD_PER_TILE_H15 = 0x0000006a, - SC_QZ0_QUAD_PER_TILE_H16 = 0x0000006b, - SC_QZ0_QUAD_COUNT = 0x0000009f, - SC_P0_HIZ_TILE_COUNT = 0x000000a3, - SC_P0_HIZ_QUAD_PER_TILE_H0 = 0x000000a7, - SC_P0_HIZ_QUAD_PER_TILE_H1 = 0x000000a8, - SC_P0_HIZ_QUAD_PER_TILE_H2 = 0x000000a9, - SC_P0_HIZ_QUAD_PER_TILE_H3 = 0x000000aa, - SC_P0_HIZ_QUAD_PER_TILE_H4 = 0x000000ab, - SC_P0_HIZ_QUAD_PER_TILE_H5 = 0x000000ac, - SC_P0_HIZ_QUAD_PER_TILE_H6 = 0x000000ad, - SC_P0_HIZ_QUAD_PER_TILE_H7 = 0x000000ae, - SC_P0_HIZ_QUAD_PER_TILE_H8 = 0x000000af, - SC_P0_HIZ_QUAD_PER_TILE_H9 = 0x000000b0, - SC_P0_HIZ_QUAD_PER_TILE_H10 = 0x000000b1, - SC_P0_HIZ_QUAD_PER_TILE_H11 = 0x000000b2, - SC_P0_HIZ_QUAD_PER_TILE_H12 = 0x000000b3, - SC_P0_HIZ_QUAD_PER_TILE_H13 = 0x000000b4, - SC_P0_HIZ_QUAD_PER_TILE_H14 = 0x000000b5, - SC_P0_HIZ_QUAD_PER_TILE_H15 = 0x000000b6, - SC_P0_HIZ_QUAD_PER_TILE_H16 = 0x000000b7, - SC_P0_HIZ_QUAD_COUNT = 0x000000eb, - SC_P0_DETAIL_QUAD_COUNT = 0x000000ef, - SC_P0_DETAIL_QUAD_WITH_1_PIX = 0x000000f3, - SC_P0_DETAIL_QUAD_WITH_2_PIX = 0x000000f4, - SC_P0_DETAIL_QUAD_WITH_3_PIX = 0x000000f5, - SC_P0_DETAIL_QUAD_WITH_4_PIX = 0x000000f6, - SC_EARLYZ_QUAD_COUNT = 0x00000103, - SC_EARLYZ_QUAD_WITH_1_PIX = 0x00000104, - SC_EARLYZ_QUAD_WITH_2_PIX = 0x00000105, - SC_EARLYZ_QUAD_WITH_3_PIX = 0x00000106, - SC_EARLYZ_QUAD_WITH_4_PIX = 0x00000107, - SC_PKR_QUAD_PER_ROW_H1 = 0x00000108, - SC_PKR_QUAD_PER_ROW_H2 = 0x00000109, - SC_PKR_4X2_QUAD_SPLIT = 0x0000010a, - SC_PKR_4X2_FILL_QUAD = 0x0000010b, - SC_PKR_END_OF_VECTOR = 0x0000010c, - SC_PKR_CONTROL_XFER = 0x0000010d, - SC_PKR_DBHANG_FORCE_EOV = 0x0000010e, - SC_REG_SCLK_BUSY = 0x0000010f, - SC_GRP0_DYN_SCLK_BUSY = 0x00000110, - SC_GRP1_DYN_SCLK_BUSY = 0x00000111, - SC_GRP2_DYN_SCLK_BUSY = 0x00000112, - SC_GRP3_DYN_SCLK_BUSY = 0x00000113, - SC_GRP4_DYN_SCLK_BUSY = 0x00000114, - SC_PA0_SC_DATA_FIFO_RD = 0x00000115, - SC_PA0_SC_DATA_FIFO_WE = 0x00000116, - SC_PS_ARB_XFC_ALL_EVENT_OR_PRIM_CYCLES = 0x00000119, - SC_PS_ARB_XFC_ONLY_PRIM_CYCLES = 0x0000011a, - SC_PS_ARB_XFC_ONLY_ONE_INC_PER_PRIM = 0x0000011b, - SC_PS_ARB_STALLED_FROM_BELOW = 0x0000011c, - SC_PS_ARB_STARVED_FROM_ABOVE = 0x0000011d, - SC_PS_ARB_SC_BUSY = 0x0000011e, - SC_PS_ARB_PA_SC_BUSY = 0x0000011f, - SC_PA0_SC_EOP_WE = 0x0000012c, - SC_PA0_SC_EVENT_WE = 0x0000012e, - SC_PS_TS_EVENT_FIFO_PUSH = 0x00000157, - SC_PS_TS_EVENT_FIFO_POP = 0x00000158, - SC_PS_CTX_DONE_FIFO_PUSH = 0x00000159, - SC_PS_CTX_DONE_FIFO_POP = 0x0000015a, - SC_PA0_SC_NULL_WE = 0x0000015d, - SC_PA0_SC_NULL_DEALLOC_WE = 0x0000015e, - SC_PA0_SC_DATA_FIFO_EOP_RD = 0x00000160, - SC_PS_PA0_SC_FIFO_EMPTY = 0x00000175, - SC_PS_PA0_SC_FIFO_FULL = 0x00000176, - SC_BUSY_CNT_NOT_ZERO = 0x00000182, - SC_BM_BUSY = 0x00000183, - SC_BACKEND_BUSY = 0x00000184, - SC_SCF_SCB_INTERFACE_BUSY = 0x00000185, - SC_SCB_BUSY = 0x00000186, - SC_STARVED_BY_PA_WITH_UNSELECTED_PA_NOT_EMPTY = 0x00000187, - SC_STARVED_BY_PA_WITH_UNSELECTED_PA_FULL = 0x00000188, - SC_PBB_BIN_HIST_NUM_PRIMS = 0x00000189, - SC_PBB_BATCH_HIST_NUM_PRIMS = 0x0000018a, - SC_PBB_BIN_HIST_NUM_CONTEXTS = 0x0000018b, - SC_PBB_BATCH_HIST_NUM_CONTEXTS = 0x0000018c, - SC_PBB_BIN_HIST_NUM_PERSISTENT_STATES = 0x0000018d, - SC_PBB_BATCH_HIST_NUM_PERSISTENT_STATES = 0x0000018e, - SC_PBB_BATCH_HIST_NUM_PS_WAVE_BREAKS = 0x0000018f, - SC_PBB_BATCH_HIST_NUM_TRIV_REJECTED_PRIMS = 0x00000190, - SC_PBB_BATCH_HIST_NUM_ROWS_PER_PRIM = 0x00000191, - SC_PBB_BATCH_HIST_NUM_COLUMNS_PER_ROW = 0x00000192, - SC_PBB_BUSY = 0x00000193, - SC_PBB_BUSY_AND_NO_SENDS = 0x00000194, - SC_PBB_STALLS_PA_DUE_TO_NO_TILES = 0x00000195, - SC_PBB_NUM_BINS = 0x00000196, - SC_PBB_END_OF_BIN = 0x00000197, - SC_PBB_END_OF_BATCH = 0x00000198, - SC_PBB_PRIMBIN_PROCESSED = 0x00000199, - SC_PBB_PRIM_ADDED_TO_BATCH = 0x0000019a, - SC_PBB_NONBINNED_PRIM = 0x0000019b, - SC_PBB_TOTAL_REAL_PRIMS_OUT_OF_PBB = 0x0000019c, - SC_PBB_TOTAL_NULL_PRIMS_OUT_OF_PBB = 0x0000019d, - SC_PBB_IDLE_CLK_DUE_TO_ROW_TO_COLUMN_TRANSITION = 0x0000019e, - SC_PBB_IDLE_CLK_DUE_TO_FALSE_POSITIVE_ON_ROW = 0x0000019f, - SC_PBB_IDLE_CLK_DUE_TO_FALSE_POSITIVE_ON_COLUMN = 0x000001a0, - SC_PBB_BATCH_BREAK_DUE_TO_PERSISTENT_STATE = 0x000001a1, - SC_PBB_BATCH_BREAK_DUE_TO_CONTEXT_STATE = 0x000001a2, - SC_PBB_BATCH_BREAK_DUE_TO_PRIM = 0x000001a3, - SC_PBB_BATCH_BREAK_DUE_TO_PC_STORAGE = 0x000001a4, - SC_PBB_BATCH_BREAK_DUE_TO_EVENT = 0x000001a5, - SC_PBB_BATCH_BREAK_DUE_TO_FPOV_LIMIT = 0x000001a6, - SC_GRP5_DYN_SCLK_BUSY = 0x000001b5, - SC_GRP6_DYN_SCLK_BUSY = 0x000001b6, - SC_GRP7_DYN_SCLK_BUSY = 0x000001b7, - SC_GRP8_DYN_SCLK_BUSY = 0x000001b8, - SC_GRP9_DYN_SCLK_BUSY = 0x000001b9, - SC_PS_TO_BE_SCLK_GATE_STALL = 0x000001ba, - SC_PA_TO_PBB_SCLK_GATE_STALL_STALL = 0x000001bb, - SC_PK_BUSY = 0x000001bc, - SC_PK_MAX_DEALLOC_FORCE_EOV = 0x000001bd, - SC_PK_DEALLOC_WAVE_BREAK = 0x000001be, - SC_SPI_SEND = 0x000001bf, - SC_SPI_CREDIT_AT_ZERO_WITH_PENDING_SEND = 0x000001c0, - SC_SPI_CREDIT_AT_MAX = 0x000001c1, - SC_SPI_CREDIT_AT_MAX_NO_PENDING_SEND = 0x000001c2, - SC_BCI_SEND = 0x000001c3, - SC_BCI_CREDIT_AT_ZERO_WITH_PENDING_SEND = 0x000001c4, - SC_BCI_CREDIT_AT_MAX = 0x000001c5, - SC_BCI_CREDIT_AT_MAX_NO_PENDING_SEND = 0x000001c6, - SC_SPIBC_FULL_FREEZE = 0x000001c7, - SC_PW_BM_PASS_EMPTY_PRIM = 0x000001c8, - SC_SUPERTILE_COUNT_EXCLUDE_PASS_EMPTY_PRIM = 0x000001c9, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H0 = 0x000001ca, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H1 = 0x000001cb, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H2 = 0x000001cc, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H3 = 0x000001cd, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H4 = 0x000001ce, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H5 = 0x000001cf, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H6 = 0x000001d0, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H7 = 0x000001d1, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H8 = 0x000001d2, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H9 = 0x000001d3, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H10 = 0x000001d4, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H11 = 0x000001d5, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H12 = 0x000001d6, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H13 = 0x000001d7, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H14 = 0x000001d8, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H15 = 0x000001d9, - SC_SUPERTILE_PER_PRIM_EXCLUDE_PASS_EMPTY_PRIM_H16 = 0x000001da, - SC_DB0_TILE_INTERFACE_BUSY = 0x000001db, - SC_DB0_TILE_INTERFACE_SEND = 0x000001dc, - SC_DB0_TILE_INTERFACE_SEND_EVENT = 0x000001dd, - SC_DB0_TILE_INTERFACE_CREDIT_AT_ZERO_WITH_PENDING_SEND = 0x000001e0, - SC_DB0_TILE_INTERFACE_CREDIT_AT_MAX = 0x000001e1, - SC_DB0_TILE_INTERFACE_CREDIT_AT_MAX_WITH_NO_PENDING_SEND = 0x000001e2, -#if CHIP_HDR_PHOENIX1 - SC_VRC_REPROBE_XFR__APU11 = 0x00000299, - SC_VRC_REPROBE_FULL__APU11 = 0x0000029a, -#endif - SC_SC_PS_ENG_MULTICYCLE_BUBBLE__GFX09 = 0x0000013d, - SC_SC_SPI_DEALLOC_0_0__GFX09 = 0x00000146, - SC_SC_SPI_DEALLOC_0_1__GFX09 = 0x00000147, - SC_SC_SPI_DEALLOC_0_2__GFX09 = 0x00000148, - SC_SC_SPI_DEALLOC_1_0__GFX09 = 0x00000149, - SC_SC_SPI_DEALLOC_1_1__GFX09 = 0x0000014a, - SC_SC_SPI_DEALLOC_1_2__GFX09 = 0x0000014b, - SC_SC_SPI_DEALLOC_2_0__GFX09 = 0x0000014c, - SC_SC_SPI_DEALLOC_2_1__GFX09 = 0x0000014d, - SC_SC_SPI_DEALLOC_2_2__GFX09 = 0x0000014e, - SC_SC_SPI_DEALLOC_3_0__GFX09 = 0x0000014f, - SC_SC_SPI_DEALLOC_3_1__GFX09 = 0x00000150, - SC_SC_SPI_DEALLOC_3_2__GFX09 = 0x00000151, - SC_SC_SPI_FPOV_0__GFX09 = 0x00000152, - SC_SC_SPI_FPOV_1__GFX09 = 0x00000153, - SC_SC_SPI_FPOV_2__GFX09 = 0x00000154, - SC_SC_SPI_FPOV_3__GFX09 = 0x00000155, - SC_SC_SPI_EVENT__GFX09 = 0x00000156, - SC_PA0_PS_DATA_FULL_MINUS3__GFX09 = 0x00000177, - SC_PA1_PS_DATA_FULL_MINUS3__GFX09 = 0x0000017a, - SC_PA2_PS_DATA_FULL_MINUS3__GFX09 = 0x0000017d, - SC_PA3_PS_DATA_FULL_MINUS3__GFX09 = 0x00000180, - SC_POPS_INTRA_WAVE_OVERLAPS__GFX09 = 0x000001a7, - SC_POPS_FORCE_EOV__GFX09 = 0x000001a8, - SC_PKR_QUAD_OVLP_NOT_FOUND_IN_WAVE_TABLE_AND_WAVES_SINCE_OVLP_SET_TO_MAX__GFX09 = 0x000001a9, - SC_PKR_QUAD_OVLP_NOT_FOUND_IN_WAVE_TABLE_AND_NO_CHANGE_TO_WAVES_SINCE_OVLP__GFX09 = 0x000001aa, - SC_PKR_QUAD_OVLP_FOUND_IN_WAVE_TABLE__GFX09 = 0x000001ab, - SC_FULL_FULL_QUAD__GFX09 = 0x000001ac, - SC_FULL_HALF_QUAD__GFX09 = 0x000001ad, - SC_FULL_QTR_QUAD__GFX09 = 0x000001ae, - SC_HALF_FULL_QUAD__GFX09 = 0x000001af, - SC_HALF_HALF_QUAD__GFX09 = 0x000001b0, - SC_HALF_QTR_QUAD__GFX09 = 0x000001b1, - SC_QTR_FULL_QUAD__GFX09 = 0x000001b2, - SC_QTR_HALF_QUAD__GFX09 = 0x000001b3, - SC_QTR_QTR_QUAD__GFX09 = 0x000001b4, - SC_DB0_TILE_INTERFACE_SEND_SOP_ONLY_EVENT__GFX09 = 0x000001de, - SC_DB0_TILE_INTERFACE_SEND_SOP__GFX09 = 0x000001df, - SC_DB1_TILE_INTERFACE_SEND_SOP_ONLY_EVENT__GFX09 = 0x000001e6, - SC_DB1_TILE_INTERFACE_SEND_SOP__GFX09 = 0x000001e7, - SC_TILE_PICKED_H2__GFX09_10 = 0x0000004c, - SC_TILE_PICKED_H3__GFX09_10 = 0x0000004d, - SC_TILE_PICKED_H4__GFX09_10 = 0x0000004e, - SC_QZ1_TILE_COUNT__GFX09_10 = 0x00000050, - SC_QZ2_TILE_COUNT__GFX09_10 = 0x00000051, - SC_QZ3_TILE_COUNT__GFX09_10 = 0x00000052, - SC_QZ1_TILE_COVERED_COUNT__GFX09_10 = 0x00000054, - SC_QZ2_TILE_COVERED_COUNT__GFX09_10 = 0x00000055, - SC_QZ3_TILE_COVERED_COUNT__GFX09_10 = 0x00000056, - SC_QZ1_TILE_NOT_COVERED_COUNT__GFX09_10 = 0x00000058, - SC_QZ2_TILE_NOT_COVERED_COUNT__GFX09_10 = 0x00000059, - SC_QZ3_TILE_NOT_COVERED_COUNT__GFX09_10 = 0x0000005a, - SC_QZ1_QUAD_PER_TILE_H0__GFX09_10 = 0x0000006c, - SC_QZ1_QUAD_PER_TILE_H1__GFX09_10 = 0x0000006d, - SC_QZ1_QUAD_PER_TILE_H2__GFX09_10 = 0x0000006e, - SC_QZ1_QUAD_PER_TILE_H3__GFX09_10 = 0x0000006f, - SC_QZ1_QUAD_PER_TILE_H4__GFX09_10 = 0x00000070, - SC_QZ1_QUAD_PER_TILE_H5__GFX09_10 = 0x00000071, - SC_QZ1_QUAD_PER_TILE_H6__GFX09_10 = 0x00000072, - SC_QZ1_QUAD_PER_TILE_H7__GFX09_10 = 0x00000073, - SC_QZ1_QUAD_PER_TILE_H8__GFX09_10 = 0x00000074, - SC_QZ1_QUAD_PER_TILE_H9__GFX09_10 = 0x00000075, - SC_QZ1_QUAD_PER_TILE_H10__GFX09_10 = 0x00000076, - SC_QZ1_QUAD_PER_TILE_H11__GFX09_10 = 0x00000077, - SC_QZ1_QUAD_PER_TILE_H12__GFX09_10 = 0x00000078, - SC_QZ1_QUAD_PER_TILE_H13__GFX09_10 = 0x00000079, - SC_QZ1_QUAD_PER_TILE_H14__GFX09_10 = 0x0000007a, - SC_QZ1_QUAD_PER_TILE_H15__GFX09_10 = 0x0000007b, - SC_QZ1_QUAD_PER_TILE_H16__GFX09_10 = 0x0000007c, - SC_QZ2_QUAD_PER_TILE_H0__GFX09_10 = 0x0000007d, - SC_QZ2_QUAD_PER_TILE_H1__GFX09_10 = 0x0000007e, - SC_QZ2_QUAD_PER_TILE_H2__GFX09_10 = 0x0000007f, - SC_QZ2_QUAD_PER_TILE_H3__GFX09_10 = 0x00000080, - SC_QZ2_QUAD_PER_TILE_H4__GFX09_10 = 0x00000081, - SC_QZ2_QUAD_PER_TILE_H5__GFX09_10 = 0x00000082, - SC_QZ2_QUAD_PER_TILE_H6__GFX09_10 = 0x00000083, - SC_QZ2_QUAD_PER_TILE_H7__GFX09_10 = 0x00000084, - SC_QZ2_QUAD_PER_TILE_H8__GFX09_10 = 0x00000085, - SC_QZ2_QUAD_PER_TILE_H9__GFX09_10 = 0x00000086, - SC_QZ2_QUAD_PER_TILE_H10__GFX09_10 = 0x00000087, - SC_QZ2_QUAD_PER_TILE_H11__GFX09_10 = 0x00000088, - SC_QZ2_QUAD_PER_TILE_H12__GFX09_10 = 0x00000089, - SC_QZ2_QUAD_PER_TILE_H13__GFX09_10 = 0x0000008a, - SC_QZ2_QUAD_PER_TILE_H14__GFX09_10 = 0x0000008b, - SC_QZ2_QUAD_PER_TILE_H15__GFX09_10 = 0x0000008c, - SC_QZ2_QUAD_PER_TILE_H16__GFX09_10 = 0x0000008d, - SC_QZ3_QUAD_PER_TILE_H0__GFX09_10 = 0x0000008e, - SC_QZ3_QUAD_PER_TILE_H1__GFX09_10 = 0x0000008f, - SC_QZ3_QUAD_PER_TILE_H2__GFX09_10 = 0x00000090, - SC_QZ3_QUAD_PER_TILE_H3__GFX09_10 = 0x00000091, - SC_QZ3_QUAD_PER_TILE_H4__GFX09_10 = 0x00000092, - SC_QZ3_QUAD_PER_TILE_H5__GFX09_10 = 0x00000093, - SC_QZ3_QUAD_PER_TILE_H6__GFX09_10 = 0x00000094, - SC_QZ3_QUAD_PER_TILE_H7__GFX09_10 = 0x00000095, - SC_QZ3_QUAD_PER_TILE_H8__GFX09_10 = 0x00000096, - SC_QZ3_QUAD_PER_TILE_H9__GFX09_10 = 0x00000097, - SC_QZ3_QUAD_PER_TILE_H10__GFX09_10 = 0x00000098, - SC_QZ3_QUAD_PER_TILE_H11__GFX09_10 = 0x00000099, - SC_QZ3_QUAD_PER_TILE_H12__GFX09_10 = 0x0000009a, - SC_QZ3_QUAD_PER_TILE_H13__GFX09_10 = 0x0000009b, - SC_QZ3_QUAD_PER_TILE_H14__GFX09_10 = 0x0000009c, - SC_QZ3_QUAD_PER_TILE_H15__GFX09_10 = 0x0000009d, - SC_QZ3_QUAD_PER_TILE_H16__GFX09_10 = 0x0000009e, - SC_QZ1_QUAD_COUNT__GFX09_10 = 0x000000a0, - SC_QZ2_QUAD_COUNT__GFX09_10 = 0x000000a1, - SC_QZ3_QUAD_COUNT__GFX09_10 = 0x000000a2, - SC_P1_HIZ_TILE_COUNT__GFX09_10 = 0x000000a4, - SC_P2_HIZ_TILE_COUNT__GFX09_10 = 0x000000a5, - SC_P3_HIZ_TILE_COUNT__GFX09_10 = 0x000000a6, - SC_P1_HIZ_QUAD_PER_TILE_H0__GFX09_10 = 0x000000b8, - SC_P1_HIZ_QUAD_PER_TILE_H1__GFX09_10 = 0x000000b9, - SC_P1_HIZ_QUAD_PER_TILE_H2__GFX09_10 = 0x000000ba, - SC_P1_HIZ_QUAD_PER_TILE_H3__GFX09_10 = 0x000000bb, - SC_P1_HIZ_QUAD_PER_TILE_H4__GFX09_10 = 0x000000bc, - SC_P1_HIZ_QUAD_PER_TILE_H5__GFX09_10 = 0x000000bd, - SC_P1_HIZ_QUAD_PER_TILE_H6__GFX09_10 = 0x000000be, - SC_P1_HIZ_QUAD_PER_TILE_H7__GFX09_10 = 0x000000bf, - SC_P1_HIZ_QUAD_PER_TILE_H8__GFX09_10 = 0x000000c0, - SC_P1_HIZ_QUAD_PER_TILE_H9__GFX09_10 = 0x000000c1, - SC_P1_HIZ_QUAD_PER_TILE_H10__GFX09_10 = 0x000000c2, - SC_P1_HIZ_QUAD_PER_TILE_H11__GFX09_10 = 0x000000c3, - SC_P1_HIZ_QUAD_PER_TILE_H12__GFX09_10 = 0x000000c4, - SC_P1_HIZ_QUAD_PER_TILE_H13__GFX09_10 = 0x000000c5, - SC_P1_HIZ_QUAD_PER_TILE_H14__GFX09_10 = 0x000000c6, - SC_P1_HIZ_QUAD_PER_TILE_H15__GFX09_10 = 0x000000c7, - SC_P1_HIZ_QUAD_PER_TILE_H16__GFX09_10 = 0x000000c8, - SC_P2_HIZ_QUAD_PER_TILE_H0__GFX09_10 = 0x000000c9, - SC_P2_HIZ_QUAD_PER_TILE_H1__GFX09_10 = 0x000000ca, - SC_P2_HIZ_QUAD_PER_TILE_H2__GFX09_10 = 0x000000cb, - SC_P2_HIZ_QUAD_PER_TILE_H3__GFX09_10 = 0x000000cc, - SC_P2_HIZ_QUAD_PER_TILE_H4__GFX09_10 = 0x000000cd, - SC_P2_HIZ_QUAD_PER_TILE_H5__GFX09_10 = 0x000000ce, - SC_P2_HIZ_QUAD_PER_TILE_H6__GFX09_10 = 0x000000cf, - SC_P2_HIZ_QUAD_PER_TILE_H7__GFX09_10 = 0x000000d0, - SC_P2_HIZ_QUAD_PER_TILE_H8__GFX09_10 = 0x000000d1, - SC_P2_HIZ_QUAD_PER_TILE_H9__GFX09_10 = 0x000000d2, - SC_P2_HIZ_QUAD_PER_TILE_H10__GFX09_10 = 0x000000d3, - SC_P2_HIZ_QUAD_PER_TILE_H11__GFX09_10 = 0x000000d4, - SC_P2_HIZ_QUAD_PER_TILE_H12__GFX09_10 = 0x000000d5, - SC_P2_HIZ_QUAD_PER_TILE_H13__GFX09_10 = 0x000000d6, - SC_P2_HIZ_QUAD_PER_TILE_H14__GFX09_10 = 0x000000d7, - SC_P2_HIZ_QUAD_PER_TILE_H15__GFX09_10 = 0x000000d8, - SC_P2_HIZ_QUAD_PER_TILE_H16__GFX09_10 = 0x000000d9, - SC_P3_HIZ_QUAD_PER_TILE_H0__GFX09_10 = 0x000000da, - SC_P3_HIZ_QUAD_PER_TILE_H1__GFX09_10 = 0x000000db, - SC_P3_HIZ_QUAD_PER_TILE_H2__GFX09_10 = 0x000000dc, - SC_P3_HIZ_QUAD_PER_TILE_H3__GFX09_10 = 0x000000dd, - SC_P3_HIZ_QUAD_PER_TILE_H4__GFX09_10 = 0x000000de, - SC_P3_HIZ_QUAD_PER_TILE_H5__GFX09_10 = 0x000000df, - SC_P3_HIZ_QUAD_PER_TILE_H6__GFX09_10 = 0x000000e0, - SC_P3_HIZ_QUAD_PER_TILE_H7__GFX09_10 = 0x000000e1, - SC_P3_HIZ_QUAD_PER_TILE_H8__GFX09_10 = 0x000000e2, - SC_P3_HIZ_QUAD_PER_TILE_H9__GFX09_10 = 0x000000e3, - SC_P3_HIZ_QUAD_PER_TILE_H10__GFX09_10 = 0x000000e4, - SC_P3_HIZ_QUAD_PER_TILE_H11__GFX09_10 = 0x000000e5, - SC_P3_HIZ_QUAD_PER_TILE_H12__GFX09_10 = 0x000000e6, - SC_P3_HIZ_QUAD_PER_TILE_H13__GFX09_10 = 0x000000e7, - SC_P3_HIZ_QUAD_PER_TILE_H14__GFX09_10 = 0x000000e8, - SC_P3_HIZ_QUAD_PER_TILE_H15__GFX09_10 = 0x000000e9, - SC_P3_HIZ_QUAD_PER_TILE_H16__GFX09_10 = 0x000000ea, - SC_P1_HIZ_QUAD_COUNT__GFX09_10 = 0x000000ec, - SC_P2_HIZ_QUAD_COUNT__GFX09_10 = 0x000000ed, - SC_P3_HIZ_QUAD_COUNT__GFX09_10 = 0x000000ee, - SC_P1_DETAIL_QUAD_COUNT__GFX09_10 = 0x000000f0, - SC_P2_DETAIL_QUAD_COUNT__GFX09_10 = 0x000000f1, - SC_P3_DETAIL_QUAD_COUNT__GFX09_10 = 0x000000f2, - SC_P1_DETAIL_QUAD_WITH_1_PIX__GFX09_10 = 0x000000f7, - SC_P1_DETAIL_QUAD_WITH_2_PIX__GFX09_10 = 0x000000f8, - SC_P1_DETAIL_QUAD_WITH_3_PIX__GFX09_10 = 0x000000f9, - SC_P1_DETAIL_QUAD_WITH_4_PIX__GFX09_10 = 0x000000fa, - SC_P2_DETAIL_QUAD_WITH_1_PIX__GFX09_10 = 0x000000fb, - SC_P2_DETAIL_QUAD_WITH_2_PIX__GFX09_10 = 0x000000fc, - SC_P2_DETAIL_QUAD_WITH_3_PIX__GFX09_10 = 0x000000fd, - SC_P2_DETAIL_QUAD_WITH_4_PIX__GFX09_10 = 0x000000fe, - SC_P3_DETAIL_QUAD_WITH_1_PIX__GFX09_10 = 0x000000ff, - SC_P3_DETAIL_QUAD_WITH_2_PIX__GFX09_10 = 0x00000100, - SC_P3_DETAIL_QUAD_WITH_3_PIX__GFX09_10 = 0x00000101, - SC_P3_DETAIL_QUAD_WITH_4_PIX__GFX09_10 = 0x00000102, - SC_PA1_SC_DATA_FIFO_RD__GFX09_10 = 0x00000117, - SC_PA1_SC_DATA_FIFO_WE__GFX09_10 = 0x00000118, - SC_PA2_SC_DATA_FIFO_RD__GFX09_10 = 0x00000120, - SC_PA2_SC_DATA_FIFO_WE__GFX09_10 = 0x00000121, - SC_PA3_SC_DATA_FIFO_RD__GFX09_10 = 0x00000122, - SC_PA3_SC_DATA_FIFO_WE__GFX09_10 = 0x00000123, - SC_PA_SC_DEALLOC_0_0_WE__GFX09_10 = 0x00000124, - SC_PA_SC_DEALLOC_0_1_WE__GFX09_10 = 0x00000125, - SC_PA_SC_DEALLOC_1_0_WE__GFX09_10 = 0x00000126, - SC_PA_SC_DEALLOC_1_1_WE__GFX09_10 = 0x00000127, - SC_PA_SC_DEALLOC_2_0_WE__GFX09_10 = 0x00000128, - SC_PA_SC_DEALLOC_2_1_WE__GFX09_10 = 0x00000129, - SC_PA_SC_DEALLOC_3_0_WE__GFX09_10 = 0x0000012a, - SC_PA_SC_DEALLOC_3_1_WE__GFX09_10 = 0x0000012b, - SC_PA0_SC_EOPG_WE__GFX09_10 = 0x0000012d, - SC_PA1_SC_EOP_WE__GFX09_10 = 0x0000012f, - SC_PA1_SC_EOPG_WE__GFX09_10 = 0x00000130, - SC_PA1_SC_EVENT_WE__GFX09_10 = 0x00000131, - SC_PA2_SC_EOP_WE__GFX09_10 = 0x00000132, - SC_PA2_SC_EOPG_WE__GFX09_10 = 0x00000133, - SC_PA2_SC_EVENT_WE__GFX09_10 = 0x00000134, - SC_PA3_SC_EOP_WE__GFX09_10 = 0x00000135, - SC_PA3_SC_EOPG_WE__GFX09_10 = 0x00000136, - SC_PA3_SC_EVENT_WE__GFX09_10 = 0x00000137, - SC_PS_ARB_OOO_THRESHOLD_SWITCH_TO_DESIRED_FIFO__GFX09_10 = 0x00000138, - SC_PS_ARB_OOO_FIFO_EMPTY_SWITCH__GFX09_10 = 0x00000139, - SC_PS_ARB_NULL_PRIM_BUBBLE_POP__GFX09_10 = 0x0000013a, - SC_PS_ARB_EOP_POP_SYNC_POP__GFX09_10 = 0x0000013b, - SC_PS_ARB_EVENT_SYNC_POP__GFX09_10 = 0x0000013c, - SC_PA0_SC_FPOV_WE__GFX09_10 = 0x0000013e, - SC_PA1_SC_FPOV_WE__GFX09_10 = 0x0000013f, - SC_PA2_SC_FPOV_WE__GFX09_10 = 0x00000140, - SC_PA3_SC_FPOV_WE__GFX09_10 = 0x00000141, - SC_PA0_SC_LPOV_WE__GFX09_10 = 0x00000142, - SC_PA1_SC_LPOV_WE__GFX09_10 = 0x00000143, - SC_PA2_SC_LPOV_WE__GFX09_10 = 0x00000144, - SC_PA3_SC_LPOV_WE__GFX09_10 = 0x00000145, - SC_MULTICYCLE_BUBBLE_FREEZE__GFX09_10 = 0x0000015b, - SC_EOP_SYNC_WINDOW__GFX09_10 = 0x0000015c, - SC_PA0_SC_DATA_FIFO_EOPG_RD__GFX09_10 = 0x0000015f, - SC_PA0_SC_DEALLOC_0_RD__GFX09_10 = 0x00000161, - SC_PA0_SC_DEALLOC_1_RD__GFX09_10 = 0x00000162, - SC_PA1_SC_DATA_FIFO_EOPG_RD__GFX09_10 = 0x00000163, - SC_PA1_SC_DATA_FIFO_EOP_RD__GFX09_10 = 0x00000164, - SC_PA1_SC_DEALLOC_0_RD__GFX09_10 = 0x00000165, - SC_PA1_SC_DEALLOC_1_RD__GFX09_10 = 0x00000166, - SC_PA1_SC_NULL_WE__GFX09_10 = 0x00000167, - SC_PA1_SC_NULL_DEALLOC_WE__GFX09_10 = 0x00000168, - SC_PA2_SC_DATA_FIFO_EOPG_RD__GFX09_10 = 0x00000169, - SC_PA2_SC_DATA_FIFO_EOP_RD__GFX09_10 = 0x0000016a, - SC_PA2_SC_DEALLOC_0_RD__GFX09_10 = 0x0000016b, - SC_PA2_SC_DEALLOC_1_RD__GFX09_10 = 0x0000016c, - SC_PA2_SC_NULL_WE__GFX09_10 = 0x0000016d, - SC_PA2_SC_NULL_DEALLOC_WE__GFX09_10 = 0x0000016e, - SC_PA3_SC_DATA_FIFO_EOPG_RD__GFX09_10 = 0x0000016f, - SC_PA3_SC_DATA_FIFO_EOP_RD__GFX09_10 = 0x00000170, - SC_PA3_SC_DEALLOC_0_RD__GFX09_10 = 0x00000171, - SC_PA3_SC_DEALLOC_1_RD__GFX09_10 = 0x00000172, - SC_PA3_SC_NULL_WE__GFX09_10 = 0x00000173, - SC_PA3_SC_NULL_DEALLOC_WE__GFX09_10 = 0x00000174, - SC_PS_PA1_SC_FIFO_EMPTY__GFX09_10 = 0x00000178, - SC_PS_PA1_SC_FIFO_FULL__GFX09_10 = 0x00000179, - SC_PS_PA2_SC_FIFO_EMPTY__GFX09_10 = 0x0000017b, - SC_PS_PA2_SC_FIFO_FULL__GFX09_10 = 0x0000017c, - SC_PS_PA3_SC_FIFO_EMPTY__GFX09_10 = 0x0000017e, - SC_PS_PA3_SC_FIFO_FULL__GFX09_10 = 0x0000017f, - SC_BUSY_PROCESSING_MULTICYCLE_PRIM__GFX09_10 = 0x00000181, - SC_DB1_TILE_INTERFACE_BUSY__GFX09_10 = 0x000001e3, - SC_DB1_TILE_INTERFACE_SEND__GFX09_10 = 0x000001e4, - SC_DB1_TILE_INTERFACE_SEND_EVENT__GFX09_10 = 0x000001e5, - SC_DB1_TILE_INTERFACE_CREDIT_AT_ZERO_WITH_PENDING_SEND__GFX09_10 = 0x000001e8, - SC_DB1_TILE_INTERFACE_CREDIT_AT_MAX__GFX09_10 = 0x000001e9, - SC_DB1_TILE_INTERFACE_CREDIT_AT_MAX_WITH_NO_PENDING_SEND__GFX09_10 = 0x000001ea, - SC_RESERVED_0__GFX10 = 0x00000177, - SC_RESERVED_1__GFX10 = 0x0000017a, - SC_RESERVED_2__GFX10 = 0x0000017d, - SC_RESERVED_3__GFX10 = 0x00000180, - SC_DB1_QUAD_INTF_SEND__GFX10 = 0x000001fd, - SC_STALLED_BY_DB1_TILEFIFO__GFX10 = 0x000001fe, - SC_DB1_QUAD_INTF_BUSY__GFX10 = 0x000001ff, - SC_DB1_QUAD_INTF_STALLED_BY_DB__GFX10 = 0x00000200, - SC_DB1_QUAD_INTF_CREDIT_AT_MAX__GFX10 = 0x00000201, - SC_DB1_QUAD_INTF_IDLE__GFX10 = 0x00000202, - SC_DB1_WE_STALLED_BY_RSLT_FIFO_FULL__GFX10 = 0x0000020b, - SC_DB1_WE_TILE_MASK_RETURN_FIFO_FULL_WITH_WE_RSLT_FIFO_STALL__GFX10 = 0x0000020c, - SC_DB1_TILE_MASK_FIFO_FULL__GFX10 = 0x0000020d, - SC_PK_PM_QD1_FD_CONFLICT_WAVE_BRK_1H__GFX10 = 0x00000214, - SC_PK_PM_QD1_FORCE_PARTIAL_FOR_DEALLOC_WAVE_BRK_1H__GFX10 = 0x00000215, - SC_PK_PM_QD1_AVOID_DEALLOC_ADD_WAVE_BRK_1H__GFX10 = 0x00000216, - SC_PK_PM_LAST_AND_DEALLOC_WAVE_BRK_1H__GFX10 = 0x0000021b, - SC_SC_PS_ENG_MULTICYCLE_BUBBLE__GFX101 = 0x0000013d, - SC_SC_SPI_DEALLOC_0_0__GFX101 = 0x00000146, - SC_SC_SPI_DEALLOC_0_1__GFX101 = 0x00000147, - SC_SC_SPI_DEALLOC_0_2__GFX101 = 0x00000148, - SC_SC_SPI_DEALLOC_1_0__GFX101 = 0x00000149, - SC_SC_SPI_DEALLOC_1_1__GFX101 = 0x0000014a, - SC_SC_SPI_DEALLOC_1_2__GFX101 = 0x0000014b, - SC_SC_SPI_DEALLOC_2_0__GFX101 = 0x0000014c, - SC_SC_SPI_DEALLOC_2_1__GFX101 = 0x0000014d, - SC_SC_SPI_DEALLOC_2_2__GFX101 = 0x0000014e, - SC_SC_SPI_DEALLOC_3_0__GFX101 = 0x0000014f, - SC_SC_SPI_DEALLOC_3_1__GFX101 = 0x00000150, - SC_SC_SPI_DEALLOC_3_2__GFX101 = 0x00000151, - SC_SC_SPI_FPOV_0__GFX101 = 0x00000152, - SC_SC_SPI_FPOV_1__GFX101 = 0x00000153, - SC_SC_SPI_FPOV_2__GFX101 = 0x00000154, - SC_SC_SPI_FPOV_3__GFX101 = 0x00000155, - SC_SC_SPI_EVENT__GFX101 = 0x00000156, - SC_PBB_BATCH_BREAK_DUE_TO_PIPELINE_MODE_CHANGE__GFX103DERIVATIVE = 0x00000238, - SC_SPI_EVENT__GFX103PLUS = 0x00000156, - SC_PK_PM_VRS_RATE_X_00_Y_00_QUAD__GFX103PLUS = 0x00000228, - SC_PK_PM_VRS_RATE_X_00_Y_01_QUAD__GFX103PLUS = 0x00000229, - SC_PK_PM_VRS_RATE_X_00_Y_10_QUAD__GFX103PLUS = 0x0000022a, - SC_PK_PM_VRS_RATE_X_00_Y_11_QUAD__GFX103PLUS = 0x0000022b, - SC_PK_PM_VRS_RATE_X_01_Y_00_QUAD__GFX103PLUS = 0x0000022c, - SC_PK_PM_VRS_RATE_X_01_Y_01_QUAD__GFX103PLUS = 0x0000022d, - SC_PK_PM_VRS_RATE_X_01_Y_10_QUAD__GFX103PLUS = 0x0000022e, - SC_PK_PM_VRS_RATE_X_01_Y_11_QUAD__GFX103PLUS = 0x0000022f, - SC_PK_PM_VRS_RATE_X_10_Y_00_QUAD__GFX103PLUS = 0x00000230, - SC_PK_PM_VRS_RATE_X_10_Y_01_QUAD__GFX103PLUS = 0x00000231, - SC_PK_PM_VRS_RATE_X_10_Y_10_QUAD__GFX103PLUS = 0x00000232, - SC_PK_PM_VRS_RATE_X_10_Y_11_QUAD__GFX103PLUS = 0x00000233, - SC_PK_PM_VRS_RATE_X_11_Y_00_QUAD__GFX103PLUS = 0x00000234, - SC_PK_PM_VRS_RATE_X_11_Y_01_QUAD__GFX103PLUS = 0x00000235, - SC_PK_PM_VRS_RATE_X_11_Y_10_QUAD__GFX103PLUS = 0x00000236, - SC_PK_PM_VRS_RATE_X_11_Y_11_QUAD__GFX103PLUS = 0x00000237, - SC_PBB_RESERVED__GFX103PLUSEXCLUSIVE = 0x00000239, - SC_BM_BE0_STALLED__GFX103PLUSEXCLUSIVE = 0x0000023a, - SC_BM_BE1_STALLED__GFX103PLUSEXCLUSIVE = 0x0000023b, - SC_BM_BE2_STALLED__GFX103PLUSEXCLUSIVE = 0x0000023c, - SC_BM_BE3_STALLED__GFX103PLUSEXCLUSIVE = 0x0000023d, - SC_BM_MULTI_ACCUM_1_BE_STALLED__GFX103PLUSEXCLUSIVE = 0x0000023e, - SC_BM_MULTI_ACCUM_2_BE_STALLED__GFX103PLUSEXCLUSIVE = 0x0000023f, - SC_BM_MULTI_ACCUM_3_BE_STALLED__GFX103PLUSEXCLUSIVE = 0x00000240, - SC_BM_MULTI_ACCUM_4_BE_STALLED__GFX103PLUSEXCLUSIVE = 0x00000241, - SC_POPS_INTRA_WAVE_OVERLAPS__GFX10CORE = 0x000001a7, - SC_POPS_FORCE_EOV__GFX10CORE = 0x000001a8, - SC_PKR_QUAD_OVLP_NOT_FOUND_IN_WAVE_TABLE_AND_WAVES_SINCE_OVLP_SET_TO_MAX__GFX10CORE = 0x000001a9, - SC_PKR_QUAD_OVLP_NOT_FOUND_IN_WAVE_TABLE_AND_NO_CHANGE_TO_WAVES_SINCE_OVLP__GFX10CORE = 0x000001aa, - SC_PKR_QUAD_OVLP_FOUND_IN_WAVE_TABLE__GFX10CORE = 0x000001ab, - SC_FULL_FULL_QUAD__GFX10CORE = 0x000001ac, - SC_FULL_HALF_QUAD__GFX10CORE = 0x000001ad, - SC_FULL_QTR_QUAD__GFX10CORE = 0x000001ae, - SC_HALF_FULL_QUAD__GFX10CORE = 0x000001af, - SC_HALF_HALF_QUAD__GFX10CORE = 0x000001b0, - SC_HALF_QTR_QUAD__GFX10CORE = 0x000001b1, - SC_QTR_FULL_QUAD__GFX10CORE = 0x000001b2, - SC_QTR_HALF_QUAD__GFX10CORE = 0x000001b3, - SC_QTR_QTR_QUAD__GFX10CORE = 0x000001b4, - SC_DB0_TILE_INTERFACE_SEND_SOP_ONLY_EVENT__GFX10CORE = 0x000001de, - SC_DB0_TILE_INTERFACE_SEND_SOP__GFX10CORE = 0x000001df, - SC_DB1_TILE_INTERFACE_SEND_SOP_ONLY_EVENT__GFX10CORE = 0x000001e6, - SC_DB1_TILE_INTERFACE_SEND_SOP__GFX10CORE = 0x000001e7, - SC_PK_PM_POPS_FORCE_EOV_WAVE_BRK_1H__GFX10CORE = 0x00000223, - SC_BACKEND_PRIM_FIFO_FULL__GFX10PLUS = 0x000001eb, - SC_PBB_BATCH_BREAK_DUE_TO_TIMEOUT_COUNTER__GFX10PLUS = 0x000001ec, - SC_PBB_BATCH_BREAK_DUE_TO_NONBINNED_BATCH__GFX10PLUS = 0x000001ed, - SC_PBB_BATCH_BREAK_DUE_TO_DEBUG_DATA_PER_DRAW_DISPATCH__GFX10PLUS = 0x000001ee, - SC_PBB_BATCH_BREAK_DUE_TO_OVERRIDE_REGISTER_PERSISTENT__GFX10PLUS = 0x000001ef, - SC_PBB_BATCH_BREAK_DUE_TO_OVERRIDE_REGISTER_CONTEXT__GFX10PLUS = 0x000001f0, - SC_PBB_BATCH_BREAK_DUE_TO_OVERRIDE_REGISTER_FPOV__GFX10PLUS = 0x000001f1, - SC_PBB_BATCH_BREAK_DUE_TO_NEW_SC_MODE__GFX10PLUS = 0x000001f2, - SC_PBB_BATCH_BREAK_DUE_TO_BINNING_MODE_CHANGE__GFX10PLUS = 0x000001f3, - SC_PBB_BATCH_BREAK_DUE_TO_PIPELINE_EVENT_COUNT__GFX10PLUS = 0x000001f4, - SC_PBB_BATCH_BREAK_DUE_TO_PIPE_RESET__GFX10PLUS = 0x000001f5, - SC_PBB_BATCH_BREAK_DUE_TO_GFX_PIPE_CHANGE__GFX10PLUS = 0x000001f6, - SC_STALLED_BY_DB0_TILEFIFO__GFX10PLUS = 0x000001f7, - SC_DB0_QUAD_INTF_SEND__GFX10PLUS = 0x000001f8, - SC_DB0_QUAD_INTF_BUSY__GFX10PLUS = 0x000001f9, - SC_DB0_QUAD_INTF_STALLED_BY_DB__GFX10PLUS = 0x000001fa, - SC_DB0_QUAD_INTF_CREDIT_AT_MAX__GFX10PLUS = 0x000001fb, - SC_DB0_QUAD_INTF_IDLE__GFX10PLUS = 0x000001fc, - SC_PKR_WAVE_BREAK_OUTSIDE_REGION__GFX10PLUS = 0x00000203, - SC_PKR_WAVE_BREAK_FULL_TILE__GFX10PLUS = 0x00000204, - SC_FSR_WALKED__GFX10PLUS = 0x00000205, - SC_PBB_EMPTY_INPUT_CYCLE_WHEN_BATCH_OPEN__GFX10PLUS = 0x00000206, - SC_PBB_BATCH_BREAK_DUE_TO_NULL_PRIM_BREAK_BATCH_LIMIT__GFX10PLUS = 0x00000207, - SC_DB0_WE_STALLED_BY_RSLT_FIFO_FULL__GFX10PLUS = 0x00000208, - SC_DB0_WE_TILE_MASK_RETURN_FIFO_FULL_WITH_WE_RSLT_FIFO_STALL__GFX10PLUS = 0x00000209, - SC_DB0_TILE_MASK_FIFO_FULL__GFX10PLUS = 0x0000020a, - SC_PS_PM_PBB_TO_PSE_FIFO_WE_STALL_BY_PFF_PW_FULL__GFX10PLUS = 0x0000020e, - SC_PS_PM_PBB_TO_PSE_FIFO_WE_STALL_BY_ZFF_PW_FULL__GFX10PLUS = 0x0000020f, - SC_PS_PM_PBB_TO_PSE_FIFO_WE_STALL_BY_PBB_TO_PSE_FIFO_FULL__GFX10PLUS = 0x00000210, - SC_PS_PM_PFF_PW_FULL__GFX10PLUS = 0x00000211, - SC_PS_PM_ZFF_PW_FULL__GFX10PLUS = 0x00000212, - SC_PS_PM_PBB_TO_PSE_FIFO_FULL__GFX10PLUS = 0x00000213, - SC_PK_PM_4X2_SPLIT_WAVE_BRK_1H__GFX10PLUS = 0x00000217, - SC_PK_PM_PKR_FILL_4X2_WAVE_BRK_1H__GFX10PLUS = 0x00000218, - SC_PK_PM_SPLIT_OR_FILL_4X2_WAVE_BRK_1H__GFX10PLUS = 0x00000219, - SC_PK_PM_END_OF_VECTOR_WAVE_BRK_1H__GFX10PLUS = 0x0000021a, - SC_PK_PM_CTL_ONLY_CMD_WAVE_BRK_1H__GFX10PLUS = 0x0000021c, - SC_PK_PM_AVOID_DEALLOC_ADD_WAVE_BRK_1H__GFX10PLUS = 0x0000021d, - SC_PK_PM_FD_CONFLICT_WAVE_BRK_1H__GFX10PLUS = 0x0000021e, - SC_PK_PM_FORCE_PARTIAL_FOR_DEALLOC_WAVE_BRK_1H__GFX10PLUS = 0x0000021f, - SC_PK_PM_AE_CONFLICT_WAVE_BRK_1H__GFX10PLUS = 0x00000220, - SC_PK_PM_EOP_OR_LAD_WAVE_BRK_1H__GFX10PLUS = 0x00000221, - SC_PK_PM_FULL_TILE_WAVE_BRK_1H__GFX10PLUS = 0x00000222, - SC_PK_PM_MAX_DEALLOC_FORCE_EOV_WAVE_BRK_1H__GFX10PLUS = 0x00000224, - SC_PK_PM_WAVE_BREAK_OUTSIDE_REGION_WAVE_BRK_1H__GFX10PLUS = 0x00000225, - SC_PK_PM_MAX_CLK_CNT_FORCE_EOV_WAVE_BRK_1H__GFX10PLUS = 0x00000226, - SC_PK_PM_MAX_REZ_CNT_FORCE_EOV_WAVE_BRK_1H__GFX10PLUS = 0x00000227, - SC_PS_ENG_MULTICYCLE_BUBBLE__GFX10VRS = 0x0000013d, - SC_SPI_DEALLOC_0_0__GFX10VRS = 0x00000146, - SC_SPI_DEALLOC_0_1__GFX10VRS = 0x00000147, - SC_SPI_DEALLOC_0_2__GFX10VRS = 0x00000148, - SC_SPI_DEALLOC_1_0__GFX10VRS = 0x00000149, - SC_SPI_DEALLOC_1_1__GFX10VRS = 0x0000014a, - SC_SPI_DEALLOC_1_2__GFX10VRS = 0x0000014b, - SC_SPI_DEALLOC_2_0__GFX10VRS = 0x0000014c, - SC_SPI_DEALLOC_2_1__GFX10VRS = 0x0000014d, - SC_SPI_DEALLOC_2_2__GFX10VRS = 0x0000014e, - SC_SPI_DEALLOC_3_0__GFX10VRS = 0x0000014f, - SC_SPI_DEALLOC_3_1__GFX10VRS = 0x00000150, - SC_SPI_DEALLOC_3_2__GFX10VRS = 0x00000151, - SC_SPI_FPOV_0__GFX10VRS = 0x00000152, - SC_SPI_FPOV_1__GFX10VRS = 0x00000153, - SC_SPI_FPOV_2__GFX10VRS = 0x00000154, - SC_SPI_FPOV_3__GFX10VRS = 0x00000155, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SC_PERF_SEL_RESERVED_76__GFX11 = 0x0000004c, - SC_PERF_SEL_RESERVED_77__GFX11 = 0x0000004d, - SC_PERF_SEL_RESERVED_78__GFX11 = 0x0000004e, - SC_PERF_SEL_RESERVED_80__GFX11 = 0x00000050, - SC_PERF_SEL_RESERVED_81__GFX11 = 0x00000051, - SC_PERF_SEL_RESERVED_82__GFX11 = 0x00000052, - SC_PERF_SEL_RESERVED_84__GFX11 = 0x00000054, - SC_PERF_SEL_RESERVED_85__GFX11 = 0x00000055, - SC_PERF_SEL_RESERVED_86__GFX11 = 0x00000056, - SC_PERF_SEL_RESERVED_88__GFX11 = 0x00000058, - SC_PERF_SEL_RESERVED_89__GFX11 = 0x00000059, - SC_PERF_SEL_RESERVED_90__GFX11 = 0x0000005a, - SC_PERF_SEL_RESERVED_108__GFX11 = 0x0000006c, - SC_PERF_SEL_RESERVED_109__GFX11 = 0x0000006d, - SC_PERF_SEL_RESERVED_110__GFX11 = 0x0000006e, - SC_PERF_SEL_RESERVED_111__GFX11 = 0x0000006f, - SC_PERF_SEL_RESERVED_112__GFX11 = 0x00000070, - SC_PERF_SEL_RESERVED_113__GFX11 = 0x00000071, - SC_PERF_SEL_RESERVED_114__GFX11 = 0x00000072, - SC_PERF_SEL_RESERVED_115__GFX11 = 0x00000073, - SC_PERF_SEL_RESERVED_116__GFX11 = 0x00000074, - SC_PERF_SEL_RESERVED_117__GFX11 = 0x00000075, - SC_PERF_SEL_RESERVED_118__GFX11 = 0x00000076, - SC_PERF_SEL_RESERVED_119__GFX11 = 0x00000077, - SC_PERF_SEL_RESERVED_120__GFX11 = 0x00000078, - SC_PERF_SEL_RESERVED_121__GFX11 = 0x00000079, - SC_PERF_SEL_RESERVED_122__GFX11 = 0x0000007a, - SC_PERF_SEL_RESERVED_123__GFX11 = 0x0000007b, - SC_PERF_SEL_RESERVED_124__GFX11 = 0x0000007c, - SC_PERF_SEL_RESERVED_125__GFX11 = 0x0000007d, - SC_PERF_SEL_RESERVED_126__GFX11 = 0x0000007e, - SC_PERF_SEL_RESERVED_127__GFX11 = 0x0000007f, - SC_PERF_SEL_RESERVED_128__GFX11 = 0x00000080, - SC_PERF_SEL_RESERVED_129__GFX11 = 0x00000081, - SC_PERF_SEL_RESERVED_130__GFX11 = 0x00000082, - SC_PERF_SEL_RESERVED_131__GFX11 = 0x00000083, - SC_PERF_SEL_RESERVED_132__GFX11 = 0x00000084, - SC_PERF_SEL_RESERVED_133__GFX11 = 0x00000085, - SC_PERF_SEL_RESERVED_134__GFX11 = 0x00000086, - SC_PERF_SEL_RESERVED_135__GFX11 = 0x00000087, - SC_PERF_SEL_RESERVED_136__GFX11 = 0x00000088, - SC_PERF_SEL_RESERVED_137__GFX11 = 0x00000089, - SC_PERF_SEL_RESERVED_138__GFX11 = 0x0000008a, - SC_PERF_SEL_RESERVED_139__GFX11 = 0x0000008b, - SC_PERF_SEL_RESERVED_140__GFX11 = 0x0000008c, - SC_PERF_SEL_RESERVED_141__GFX11 = 0x0000008d, - SC_PERF_SEL_RESERVED_142__GFX11 = 0x0000008e, - SC_PERF_SEL_RESERVED_143__GFX11 = 0x0000008f, - SC_PERF_SEL_RESERVED_144__GFX11 = 0x00000090, - SC_PERF_SEL_RESERVED_145__GFX11 = 0x00000091, - SC_PERF_SEL_RESERVED_146__GFX11 = 0x00000092, - SC_PERF_SEL_RESERVED_147__GFX11 = 0x00000093, - SC_PERF_SEL_RESERVED_148__GFX11 = 0x00000094, - SC_PERF_SEL_RESERVED_149__GFX11 = 0x00000095, - SC_PERF_SEL_RESERVED_150__GFX11 = 0x00000096, - SC_PERF_SEL_RESERVED_151__GFX11 = 0x00000097, - SC_PERF_SEL_RESERVED_152__GFX11 = 0x00000098, - SC_PERF_SEL_RESERVED_153__GFX11 = 0x00000099, - SC_PERF_SEL_RESERVED_154__GFX11 = 0x0000009a, - SC_PERF_SEL_RESERVED_155__GFX11 = 0x0000009b, - SC_PERF_SEL_RESERVED_156__GFX11 = 0x0000009c, - SC_PERF_SEL_RESERVED_157__GFX11 = 0x0000009d, - SC_PERF_SEL_RESERVED_158__GFX11 = 0x0000009e, - SC_PERF_SEL_RESERVED_160__GFX11 = 0x000000a0, - SC_PERF_SEL_RESERVED_161__GFX11 = 0x000000a1, - SC_PERF_SEL_RESERVED_162__GFX11 = 0x000000a2, - SC_PERF_SEL_RESERVED_164__GFX11 = 0x000000a4, - SC_PERF_SEL_RESERVED_165__GFX11 = 0x000000a5, - SC_PERF_SEL_RESERVED_166__GFX11 = 0x000000a6, - SC_PERF_SEL_RESERVED_184__GFX11 = 0x000000b8, - SC_PERF_SEL_RESERVED_185__GFX11 = 0x000000b9, - SC_PERF_SEL_RESERVED_186__GFX11 = 0x000000ba, - SC_PERF_SEL_RESERVED_187__GFX11 = 0x000000bb, - SC_PERF_SEL_RESERVED_188__GFX11 = 0x000000bc, - SC_PERF_SEL_RESERVED_189__GFX11 = 0x000000bd, - SC_PERF_SEL_RESERVED_190__GFX11 = 0x000000be, - SC_PERF_SEL_RESERVED_191__GFX11 = 0x000000bf, - SC_PERF_SEL_RESERVED_192__GFX11 = 0x000000c0, - SC_PERF_SEL_RESERVED_193__GFX11 = 0x000000c1, - SC_PERF_SEL_RESERVED_194__GFX11 = 0x000000c2, - SC_PERF_SEL_RESERVED_195__GFX11 = 0x000000c3, - SC_PERF_SEL_RESERVED_196__GFX11 = 0x000000c4, - SC_PERF_SEL_RESERVED_197__GFX11 = 0x000000c5, - SC_PERF_SEL_RESERVED_198__GFX11 = 0x000000c6, - SC_PERF_SEL_RESERVED_199__GFX11 = 0x000000c7, - SC_PERF_SEL_RESERVED_200__GFX11 = 0x000000c8, - SC_PERF_SEL_RESERVED_201__GFX11 = 0x000000c9, - SC_PERF_SEL_RESERVED_202__GFX11 = 0x000000ca, - SC_PERF_SEL_RESERVED_203__GFX11 = 0x000000cb, - SC_PERF_SEL_RESERVED_204__GFX11 = 0x000000cc, - SC_PERF_SEL_RESERVED_205__GFX11 = 0x000000cd, - SC_PERF_SEL_RESERVED_206__GFX11 = 0x000000ce, - SC_PERF_SEL_RESERVED_207__GFX11 = 0x000000cf, - SC_PERF_SEL_RESERVED_208__GFX11 = 0x000000d0, - SC_PERF_SEL_RESERVED_209__GFX11 = 0x000000d1, - SC_PERF_SEL_RESERVED_210__GFX11 = 0x000000d2, - SC_PERF_SEL_RESERVED_211__GFX11 = 0x000000d3, - SC_PERF_SEL_RESERVED_212__GFX11 = 0x000000d4, - SC_PERF_SEL_RESERVED_213__GFX11 = 0x000000d5, - SC_PERF_SEL_RESERVED_214__GFX11 = 0x000000d6, - SC_PERF_SEL_RESERVED_215__GFX11 = 0x000000d7, - SC_PERF_SEL_RESERVED_216__GFX11 = 0x000000d8, - SC_PERF_SEL_RESERVED_217__GFX11 = 0x000000d9, - SC_PERF_SEL_RESERVED_218__GFX11 = 0x000000da, - SC_PERF_SEL_RESERVED_219__GFX11 = 0x000000db, - SC_PERF_SEL_RESERVED_220__GFX11 = 0x000000dc, - SC_PERF_SEL_RESERVED_221__GFX11 = 0x000000dd, - SC_PERF_SEL_RESERVED_222__GFX11 = 0x000000de, - SC_PERF_SEL_RESERVED_223__GFX11 = 0x000000df, - SC_PERF_SEL_RESERVED_224__GFX11 = 0x000000e0, - SC_PERF_SEL_RESERVED_225__GFX11 = 0x000000e1, - SC_PERF_SEL_RESERVED_226__GFX11 = 0x000000e2, - SC_PERF_SEL_RESERVED_227__GFX11 = 0x000000e3, - SC_PERF_SEL_RESERVED_228__GFX11 = 0x000000e4, - SC_PERF_SEL_RESERVED_229__GFX11 = 0x000000e5, - SC_PERF_SEL_RESERVED_230__GFX11 = 0x000000e6, - SC_PERF_SEL_RESERVED_231__GFX11 = 0x000000e7, - SC_PERF_SEL_RESERVED_232__GFX11 = 0x000000e8, - SC_PERF_SEL_RESERVED_233__GFX11 = 0x000000e9, - SC_PERF_SEL_RESERVED_234__GFX11 = 0x000000ea, - SC_PERF_SEL_RESERVED_236__GFX11 = 0x000000ec, - SC_PERF_SEL_RESERVED_237__GFX11 = 0x000000ed, - SC_PERF_SEL_RESERVED_238__GFX11 = 0x000000ee, - SC_PERF_SEL_RESERVED_240__GFX11 = 0x000000f0, - SC_PERF_SEL_RESERVED_241__GFX11 = 0x000000f1, - SC_PERF_SEL_RESERVED_242__GFX11 = 0x000000f2, - SC_PERF_SEL_RESERVED_247__GFX11 = 0x000000f7, - SC_PERF_SEL_RESERVED_248__GFX11 = 0x000000f8, - SC_PERF_SEL_RESERVED_249__GFX11 = 0x000000f9, - SC_PERF_SEL_RESERVED_250__GFX11 = 0x000000fa, - SC_PERF_SEL_RESERVED_251__GFX11 = 0x000000fb, - SC_PERF_SEL_RESERVED_252__GFX11 = 0x000000fc, - SC_PERF_SEL_RESERVED_253__GFX11 = 0x000000fd, - SC_PERF_SEL_RESERVED_254__GFX11 = 0x000000fe, - SC_PERF_SEL_RESERVED_255__GFX11 = 0x000000ff, - SC_PERF_SEL_RESERVED_256__GFX11 = 0x00000100, - SC_PERF_SEL_RESERVED_257__GFX11 = 0x00000101, - SC_PERF_SEL_RESERVED_258__GFX11 = 0x00000102, - SC_PERF_SEL_RESERVED_279__GFX11 = 0x00000117, - SC_PERF_SEL_RESERVED_280__GFX11 = 0x00000118, - SC_PERF_SEL_RESERVED_288__GFX11 = 0x00000120, - SC_PERF_SEL_RESERVED_289__GFX11 = 0x00000121, - SC_PERF_SEL_RESERVED_290__GFX11 = 0x00000122, - SC_PERF_SEL_RESERVED_291__GFX11 = 0x00000123, - SC_PA_SC_DEALLOC_2_0_WE__GFX11 = 0x00000124, - SC_PERF_SEL_RESERVED_293__GFX11 = 0x00000125, - SC_PERF_SEL_RESERVED_294__GFX11 = 0x00000126, - SC_PERF_SEL_RESERVED_295__GFX11 = 0x00000127, - SC_PERF_SEL_RESERVED_296__GFX11 = 0x00000128, - SC_PERF_SEL_RESERVED_297__GFX11 = 0x00000129, - SC_PERF_SEL_RESERVED_298__GFX11 = 0x0000012a, - SC_PERF_SEL_RESERVED_299__GFX11 = 0x0000012b, - SC_PERF_SEL_RESERVED_301__GFX11 = 0x0000012d, - SC_PERF_SEL_RESERVED_303__GFX11 = 0x0000012f, - SC_PERF_SEL_RESERVED_304__GFX11 = 0x00000130, - SC_PERF_SEL_RESERVED_305__GFX11 = 0x00000131, - SC_PERF_SEL_RESERVED_306__GFX11 = 0x00000132, - SC_PERF_SEL_RESERVED_307__GFX11 = 0x00000133, - SC_PERF_SEL_RESERVED_308__GFX11 = 0x00000134, - SC_PERF_SEL_RESERVED_309__GFX11 = 0x00000135, - SC_PERF_SEL_RESERVED_310__GFX11 = 0x00000136, - SC_PERF_SEL_RESERVED_311__GFX11 = 0x00000137, - SC_PERF_SEL_RESERVED_312__GFX11 = 0x00000138, - SC_PERF_SEL_RESERVED_313__GFX11 = 0x00000139, - SC_PERF_SEL_RESERVED_314__GFX11 = 0x0000013a, - SC_PERF_SEL_RESERVED_315__GFX11 = 0x0000013b, - SC_PERF_SEL_RESERVED_316__GFX11 = 0x0000013c, - SC_PERF_SEL_RESERVED_317__GFX11 = 0x0000013d, - SC_PA_SC_FPOV_WE__GFX11 = 0x0000013e, - SC_PERF_SEL_RESERVED_319__GFX11 = 0x0000013f, - SC_PERF_SEL_RESERVED_320__GFX11 = 0x00000140, - SC_PERF_SEL_RESERVED_321__GFX11 = 0x00000141, - SC_PERF_SEL_RESERVED_322__GFX11 = 0x00000142, - SC_PERF_SEL_RESERVED_323__GFX11 = 0x00000143, - SC_PERF_SEL_RESERVED_324__GFX11 = 0x00000144, - SC_PERF_SEL_RESERVED_325__GFX11 = 0x00000145, - SC_SPI_DEALLOC_4_0__GFX11 = 0x00000146, - SC_SPI_DEALLOC_7_5__GFX11 = 0x00000147, - SC_PERF_SEL_RESERVED_328__GFX11 = 0x00000148, - SC_PERF_SEL_RESERVED_329__GFX11 = 0x00000149, - SC_PERF_SEL_RESERVED_330__GFX11 = 0x0000014a, - SC_PERF_SEL_RESERVED_331__GFX11 = 0x0000014b, - SC_PERF_SEL_RESERVED_332__GFX11 = 0x0000014c, - SC_PERF_SEL_RESERVED_333__GFX11 = 0x0000014d, - SC_PERF_SEL_RESERVED_334__GFX11 = 0x0000014e, - SC_PERF_SEL_RESERVED_335__GFX11 = 0x0000014f, - SC_PERF_SEL_RESERVED_336__GFX11 = 0x00000150, - SC_PERF_SEL_RESERVED_337__GFX11 = 0x00000151, - SC_SPI_FPOV_4_0__GFX11 = 0x00000152, - SC_SPI_FPOV_7_5__GFX11 = 0x00000153, - SC_PERF_SEL_RESERVED_340__GFX11 = 0x00000154, - SC_PERF_SEL_RESERVED_341__GFX11 = 0x00000155, - SC_PERF_SEL_RESERVED_347__GFX11 = 0x0000015b, - SC_PERF_SEL_RESERVED_348__GFX11 = 0x0000015c, - SC_PERF_SEL_RESERVED_351__GFX11 = 0x0000015f, - SC_PA0_SC_DEALLOC_2_0_RD__GFX11 = 0x00000161, - SC_PERF_SEL_RESERVED_354__GFX11 = 0x00000162, - SC_PERF_SEL_RESERVED_355__GFX11 = 0x00000163, - SC_PERF_SEL_RESERVED_356__GFX11 = 0x00000164, - SC_PERF_SEL_RESERVED_357__GFX11 = 0x00000165, - SC_PERF_SEL_RESERVED_358__GFX11 = 0x00000166, - SC_PERF_SEL_RESERVED_359__GFX11 = 0x00000167, - SC_PERF_SEL_RESERVED_360__GFX11 = 0x00000168, - SC_PERF_SEL_RESERVED_361__GFX11 = 0x00000169, - SC_PERF_SEL_RESERVED_362__GFX11 = 0x0000016a, - SC_PERF_SEL_RESERVED_363__GFX11 = 0x0000016b, - SC_PERF_SEL_RESERVED_364__GFX11 = 0x0000016c, - SC_PERF_SEL_RESERVED_365__GFX11 = 0x0000016d, - SC_PERF_SEL_RESERVED_366__GFX11 = 0x0000016e, - SC_PERF_SEL_RESERVED_367__GFX11 = 0x0000016f, - SC_PERF_SEL_RESERVED_368__GFX11 = 0x00000170, - SC_PERF_SEL_RESERVED_369__GFX11 = 0x00000171, - SC_PERF_SEL_RESERVED_370__GFX11 = 0x00000172, - SC_PERF_SEL_RESERVED_371__GFX11 = 0x00000173, - SC_PERF_SEL_RESERVED_372__GFX11 = 0x00000174, - SC_PERF_SEL_RESERVED_375__GFX11 = 0x00000177, - SC_PERF_SEL_RESERVED_376__GFX11 = 0x00000178, - SC_PERF_SEL_RESERVED_377__GFX11 = 0x00000179, - SC_PERF_SEL_RESERVED_378__GFX11 = 0x0000017a, - SC_PERF_SEL_RESERVED_379__GFX11 = 0x0000017b, - SC_PERF_SEL_RESERVED_380__GFX11 = 0x0000017c, - SC_PERF_SEL_RESERVED_381__GFX11 = 0x0000017d, - SC_PERF_SEL_RESERVED_382__GFX11 = 0x0000017e, - SC_PERF_SEL_RESERVED_383__GFX11 = 0x0000017f, - SC_PERF_SEL_RESERVED_384__GFX11 = 0x00000180, - SC_PERF_SEL_RESERVED_385__GFX11 = 0x00000181, - SC_PERF_SEL_RESERVED_423__GFX11 = 0x000001a7, - SC_PERF_SEL_RESERVED_424__GFX11 = 0x000001a8, - SC_PERF_SEL_RESERVED_425__GFX11 = 0x000001a9, - SC_PERF_SEL_RESERVED_426__GFX11 = 0x000001aa, - SC_PERF_SEL_RESERVED_427__GFX11 = 0x000001ab, - SC_PERF_SEL_RESERVED_428__GFX11 = 0x000001ac, - SC_PERF_SEL_RESERVED_429__GFX11 = 0x000001ad, - SC_PERF_SEL_RESERVED_430__GFX11 = 0x000001ae, - SC_PERF_SEL_RESERVED_431__GFX11 = 0x000001af, - SC_PERF_SEL_RESERVED_432__GFX11 = 0x000001b0, - SC_PERF_SEL_RESERVED_433__GFX11 = 0x000001b1, - SC_PERF_SEL_RESERVED_434__GFX11 = 0x000001b2, - SC_PERF_SEL_RESERVED_435__GFX11 = 0x000001b3, - SC_PERF_SEL_RESERVED_436__GFX11 = 0x000001b4, - SC_PERF_SEL_RESERVED_478__GFX11 = 0x000001de, - SC_PERF_SEL_RESERVED_479__GFX11 = 0x000001df, - SC_PERF_SEL_RESERVED_483__GFX11 = 0x000001e3, - SC_PERF_SEL_RESERVED_484__GFX11 = 0x000001e4, - SC_PERF_SEL_RESERVED_485__GFX11 = 0x000001e5, - SC_PERF_SEL_RESERVED_486__GFX11 = 0x000001e6, - SC_PERF_SEL_RESERVED_487__GFX11 = 0x000001e7, - SC_PERF_SEL_RESERVED_488__GFX11 = 0x000001e8, - SC_PERF_SEL_RESERVED_489__GFX11 = 0x000001e9, - SC_PERF_SEL_RESERVED_490__GFX11 = 0x000001ea, - SC_PERF_SEL_RESERVED_509__GFX11 = 0x000001fd, - SC_PERF_SEL_RESERVED_510__GFX11 = 0x000001fe, - SC_PERF_SEL_RESERVED_511__GFX11 = 0x000001ff, - SC_PERF_SEL_RESERVED_512__GFX11 = 0x00000200, - SC_PERF_SEL_RESERVED_513__GFX11 = 0x00000201, - SC_PERF_SEL_RESERVED_514__GFX11 = 0x00000202, - SC_PERF_SEL_RESERVED_523__GFX11 = 0x0000020b, - SC_PERF_SEL_RESERVED_524__GFX11 = 0x0000020c, - SC_PERF_SEL_RESERVED_525__GFX11 = 0x0000020d, - SC_PERF_SEL_RESERVED_532__GFX11 = 0x00000214, - SC_PERF_SEL_RESERVED_533__GFX11 = 0x00000215, - SC_PERF_SEL_RESERVED_534__GFX11 = 0x00000216, - SC_PERF_SEL_RESERVED_539__GFX11 = 0x0000021b, - SC_PK_PM_OREO_CONFLICT_QUAD_FORCE_EOV_WAVE_BRK_1H__GFX11 = 0x00000223, - SC_PERF_SEL_RESERVED_568__GFX11 = 0x00000238, - SC_PBB_READ_PH0__GFX11 = 0x00000242, - SC_PBB_READ_DEALLOC_4_0__GFX11 = 0x00000243, - SC_PBB_READ_DEALLOC_7_5__GFX11 = 0x00000244, - SC_PBB_READ_FPOG_4_0__GFX11 = 0x00000245, - SC_PBB_READ_FPOG_7_5__GFX11 = 0x00000246, - SC_VRC_SECTOR_HIT__GFX11 = 0x00000247, - SC_VRC_TAG_MISS__GFX11 = 0x00000248, - SC_VRC_SECTOR_MISS__GFX11 = 0x00000249, - SC_VRC_LRU_EVICT_STALL__GFX11 = 0x0000024a, - SC_VRC_LRU_EVICT_SCHEDULED_EVICT_STALL__GFX11 = 0x0000024b, - SC_VRC_LRU_EVICT_PENDING_EVICT_STALL__GFX11 = 0x0000024c, - SC_VRC_REEVICTION_STALL__GFX11 = 0x0000024d, - SC_VRC_EVICT_NONZERO_INFLIGHT_STALL__GFX11 = 0x0000024e, - SC_VRC_REPLACE_SCHEDULED_EVICT_STALL__GFX11 = 0x0000024f, - SC_VRC_REPLACE_PENDING_EVICT_STALL__GFX11 = 0x00000250, - SC_VRC_REPLACE_FLUSH_IN_PROGRESS_STALL__GFX11 = 0x00000251, - SC_VRC_INFLIGHT_COUNTER_MAXIMUM_STALL__GFX11 = 0x00000252, - SC_VRC_READ_OUTPUT_STALL__GFX11 = 0x00000253, - SC_VRC_WRITE_OUTPUT_STALL__GFX11 = 0x00000254, - SC_VRC_ACK_OUTPUT_STALL__GFX11 = 0x00000255, - SC_VRC_FLUSH_EVICT_STALL__GFX11 = 0x00000256, - SC_VRC_FLUSH_REFLUSH_STALL__GFX11 = 0x00000257, - SC_VRC_FLUSH_FIP_HIT_STALL__GFX11 = 0x00000258, - SC_VRC_FLUSH_WRREQ_DRAIN_STALL__GFX11 = 0x00000259, - SC_VRC_FLUSH_DONE_STALL__GFX11 = 0x0000025a, - SC_VRC_FLUSH_STALL__GFX11 = 0x0000025b, - SC_VRC_STALL__GFX11 = 0x0000025c, - SC_VRC_SECTORS_FLUSHED__GFX11 = 0x0000025d, - SC_VRC_DIRTY_SECTORS_FLUSHED__GFX11 = 0x0000025e, - SC_VRC_TAGS_FLUSHED__GFX11 = 0x0000025f, - SC_VRC_VRF_REQ__GFX11 = 0x00000260, - SC_VRC_VRF_EVENT__GFX11 = 0x00000261, - SC_VRC_VRF_STALLED__GFX11 = 0x00000262, - SC_VRC_PROBE_ACK_TILES__GFX11 = 0x00000263, - SC_VRC_GL1H_RD_REQ__GFX11 = 0x00000264, - SC_VRC_GL1H_WR_REQ__GFX11 = 0x00000265, - SC_VRC_GL1H_SRC_XFR__GFX11 = 0x00000266, - SC_VRC_GL1H_RD_RET_0__GFX11 = 0x00000267, - SC_VRC_GL1H_RD_RET_1__GFX11 = 0x00000268, - SC_VRC_GL1H_WR_ACK_0__GFX11 = 0x00000269, - SC_VRC_GL1H_WR_ACK_1__GFX11 = 0x0000026a, - SC_VRC_GL1H_RD_XNACK_0__GFX11 = 0x0000026b, - SC_VRC_GL1H_RD_XNACK_1__GFX11 = 0x0000026c, - SC_VRC_GL1H_WR_XNACK_0__GFX11 = 0x0000026d, - SC_VRC_GL1H_WR_XNACK_1__GFX11 = 0x0000026e, - SC_VRC_GL1H_REQ_STALLED__GFX11 = 0x0000026f, - SC_VRC_GL1H_SRC_STALLED__GFX11 = 0x00000270, - SC_VRC_RATEMEM_WE_CNT__GFX11 = 0x00000271, - SC_VRC_RATEMEM_RE_CNT__GFX11 = 0x00000272, - SC_VRC_HINTMEM_WE_CNT__GFX11 = 0x00000273, - SC_VRC_HINTMEM_RE_CNT__GFX11 = 0x00000274, - SC_VRC_BUSY__GFX11 = 0x00000275, - SC_GL1H_BUSY__GFX11 = 0x00000276, - SC_BE_VRS_RD_REQ__GFX11 = 0x00000277, - SC_BE_VRS_RD_REQ_STALLED__GFX11 = 0x00000278, - SC_BE_VRS_RD_REQ_HIT__GFX11 = 0x00000279, - SC_BE_VRS_RD_RET__GFX11 = 0x0000027a, - SC_BE_VRS_RD_RET_STALLED__GFX11 = 0x0000027b, - SC_BE_VRS_FB_RET__GFX11 = 0x0000027c, - SC_BE_VRS_FB_RET_STALLED__GFX11 = 0x0000027d, - SC_BE_VRS_FB_RET_HIT__GFX11 = 0x0000027e, - SC_VRS_BE_BUSY__GFX11 = 0x0000027f, - SC_PWS_CS_EVENTS_PWS_ENABLE__GFX11 = 0x00000280, - SC_PWS_PS_EVENTS_PWS_ENABLE__GFX11 = 0x00000281, - SC_PWS_TS_EVENTS_PWS_ENABLE__GFX11 = 0x00000282, - SC_PWS_STALLED__GFX11 = 0x00000283, - SC_PWS_P0_CS_SYNC_COMPLETE__GFX11 = 0x00000284, - SC_PWS_P0_PS_SYNC_COMPLETE__GFX11 = 0x00000285, - SC_PWS_P0_TS_SYNC_COMPLETE__GFX11 = 0x00000286, - SC_PWS_P1_CS_SYNC_COMPLETE__GFX11 = 0x00000287, - SC_PWS_P1_PS_SYNC_COMPLETE__GFX11 = 0x00000288, - SC_PWS_P1_TS_SYNC_COMPLETE__GFX11 = 0x00000289, - SC_PKR_PC_NO_CREDITS__GFX11 = 0x0000028a, - SC_PKR_PC_STALLED__GFX11 = 0x0000028b, - SC_PKR_PC_SEND__GFX11 = 0x0000028c, - SC_PKR_PC_SEND_PRIM_VALID_1__GFX11 = 0x0000028d, - SC_PKR_PC_SEND_PRIM_VALID_0__GFX11 = 0x0000028e, - SC_PKR_PC_SEND_TRUE_PRIM__GFX11 = 0x0000028f, - SC_PKR_PC_SEND_EOV__GFX11 = 0x00000290, - SC_PKR_PC_SEND_EVENT__GFX11 = 0x00000291, - SC_PKR_DB_WAVE_STALL__GFX11 = 0x00000292, - SC_PKR_PSINVOC_SEDC_FIFO_FULL__GFX11 = 0x00000293, - SC_PKR_OREO_STALLED_BY_NO_VALID_WAIVE_ID__GFX11 = 0x00000294, - SC_PKR_SPI_QUAD_COUNT__GFX11 = 0x00000295, - SC_PKR_DB_OREO_WAVE_QUAD_COUNT__GFX11 = 0x00000296, - SC_PKR_BCI_QUAD_NEW_PRIM__GFX11 = 0x00000297, - SC_SPI_WAVE_STALLED_BY_SPI__GFX11 = 0x00000298, -#endif -#if CHIP_HDR_NAVI32 - SC_VRC_REPROBE_XFR__NV32 = 0x00000299, - SC_VRC_REPROBE_FULL__NV32 = 0x0000029a, -#endif -#if CHIP_HDR_NAVI33 - SC_VRC_REPROBE_XFR__NV33 = 0x00000299, - SC_VRC_REPROBE_FULL__NV33 = 0x0000029a, -#endif - SC_BACKEND_PRIM_FIFO_FULL__VG12_VG20_RN = 0x000001eb, -} SC_PERFCNT_SEL; - -constexpr unsigned int MaxScPerfcntSelVg10_Rv1x_Rv2x = SC_DB1_TILE_INTERFACE_CREDIT_AT_MAX_WITH_NO_PENDING_SEND__GFX09_10; -constexpr unsigned int MaxScPerfcntSelVg12_Vg20_Rn = SC_BACKEND_PRIM_FIFO_FULL__VG12_VG20_RN; -constexpr unsigned int MaxScPerfcntSelGfx101 = SC_PK_PM_MAX_REZ_CNT_FORCE_EOV_WAVE_BRK_1H__GFX10PLUS; -constexpr unsigned int MaxScPerfcntSelGfx103 = SC_BM_MULTI_ACCUM_4_BE_STALLED__GFX103PLUSEXCLUSIVE; -#if CHIP_HDR_NAVI31 -constexpr unsigned int MaxScPerfcntSelNv31 = SC_SPI_WAVE_STALLED_BY_SPI__GFX11; -#endif -#if CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxScPerfcntSelApu11 = SC_VRC_REPROBE_FULL__APU11; -#endif -#if CHIP_HDR_NAVI33 -constexpr unsigned int MaxScPerfcntSelNv33 = SC_VRC_REPROBE_FULL__NV33; -#endif -#if CHIP_HDR_NAVI32 -constexpr unsigned int MaxScPerfcntSelNv32 = SC_VRC_REPROBE_FULL__NV32; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef enum SDMA_PERFMON_SEL { - SDMA_PERFMON_SEL_CYCLE = 0x00000000, - SDMA_PERFMON_SEL_IDLE = 0x00000001, - SDMA_PERFMON_SEL_REG_IDLE = 0x00000002, - SDMA_PERFMON_SEL_RB_EMPTY = 0x00000003, - SDMA_PERFMON_SEL_RB_FULL = 0x00000004, - SDMA_PERFMON_SEL_RB_WPTR_WRAP = 0x00000005, - SDMA_PERFMON_SEL_RB_RPTR_WRAP = 0x00000006, - SDMA_PERFMON_SEL_RB_WPTR_POLL_READ = 0x00000007, - SDMA_PERFMON_SEL_RB_RPTR_WB = 0x00000008, - SDMA_PERFMON_SEL_RB_CMD_IDLE = 0x00000009, - SDMA_PERFMON_SEL_RB_CMD_FULL = 0x0000000a, - SDMA_PERFMON_SEL_IB_CMD_IDLE = 0x0000000b, - SDMA_PERFMON_SEL_IB_CMD_FULL = 0x0000000c, - SDMA_PERFMON_SEL_EX_IDLE = 0x0000000d, - SDMA_PERFMON_SEL_SRBM_REG_SEND = 0x0000000e, - SDMA_PERFMON_SEL_EX_IDLE_POLL_TIMER_EXPIRE = 0x0000000f, - SDMA_PERFMON_SEL_WR_BA_RTR = 0x00000010, - SDMA_PERFMON_SEL_MC_WR_IDLE = 0x00000011, - SDMA_PERFMON_SEL_MC_WR_COUNT = 0x00000012, - SDMA_PERFMON_SEL_RD_BA_RTR = 0x00000013, - SDMA_PERFMON_SEL_MC_RD_IDLE = 0x00000014, - SDMA_PERFMON_SEL_MC_RD_COUNT = 0x00000015, - SDMA_PERFMON_SEL_MC_RD_RET_STALL = 0x00000016, - SDMA_PERFMON_SEL_MC_RD_NO_POLL_IDLE = 0x00000017, - SDMA_PERFMON_SEL_DRM_IDLE = 0x00000018, - SDMA_PERFMON_SEL_DRM_REQ_STALL = 0x00000019, - SDMA_PERFMON_SEL_SEM_IDLE = 0x0000001a, - SDMA_PERFMON_SEL_SEM_REQ_STALL = 0x0000001b, - SDMA_PERFMON_SEL_SEM_REQ_COUNT = 0x0000001c, - SDMA_PERFMON_SEL_SEM_RESP_INCOMPLETE = 0x0000001d, - SDMA_PERFMON_SEL_SEM_RESP_FAIL = 0x0000001e, - SDMA_PERFMON_SEL_SEM_RESP_PASS = 0x0000001f, - SDMA_PERFMON_SEL_INT_IDLE = 0x00000020, - SDMA_PERFMON_SEL_INT_REQ_STALL = 0x00000021, - SDMA_PERFMON_SEL_INT_REQ_COUNT = 0x00000022, - SDMA_PERFMON_SEL_INT_RESP_ACCEPTED = 0x00000023, - SDMA_PERFMON_SEL_INT_RESP_RETRY = 0x00000024, - SDMA_PERFMON_SEL_NUM_PACKET = 0x00000025, - SDMA_PERFMON_SEL_DRM1_REQ_STALL = 0x00000026, - SDMA_PERFMON_SEL_CE_WREQ_IDLE = 0x00000027, - SDMA_PERFMON_SEL_CE_WR_IDLE = 0x00000028, - SDMA_PERFMON_SEL_CE_SPLIT_IDLE = 0x00000029, - SDMA_PERFMON_SEL_CE_RREQ_IDLE = 0x0000002a, - SDMA_PERFMON_SEL_CE_OUT_IDLE = 0x0000002b, - SDMA_PERFMON_SEL_CE_IN_IDLE = 0x0000002c, - SDMA_PERFMON_SEL_CE_DST_IDLE = 0x0000002d, - SDMA_PERFMON_SEL_CE_DRM_IDLE = 0x0000002e, - SDMA_PERFMON_SEL_CE_DRM1_IDLE = 0x0000002f, - SDMA_PERFMON_SEL_CE_AFIFO_FULL = 0x00000030, - SDMA_PERFMON_SEL_DUMMY_0 = 0x00000031, - SDMA_PERFMON_SEL_DUMMY_1 = 0x00000032, - SDMA_PERFMON_SEL_CE_INFO_FULL = 0x00000033, - SDMA_PERFMON_SEL_CE_INFO1_FULL = 0x00000034, - SDMA_PERFMON_SEL_CE_RD_STALL = 0x00000035, - SDMA_PERFMON_SEL_CE_WR_STALL = 0x00000036, - SDMA_PERFMON_SEL_QUEUE0_SELECT = 0x00000037, - SDMA_PERFMON_SEL_QUEUE1_SELECT = 0x00000038, - SDMA_PERFMON_SEL_QUEUE2_SELECT = 0x00000039, - SDMA_PERFMON_SEL_QUEUE3_SELECT = 0x0000003a, - SDMA_PERFMON_SEL_CTX_CHANGE = 0x0000003b, - SDMA_PERFMON_SEL_CTX_CHANGE_EXPIRED = 0x0000003c, - SDMA_PERFMON_SEL_CTX_CHANGE_EXCEPTION = 0x0000003d, - SDMA_PERFMON_SEL_DOORBELL = 0x0000003e, - SDMA_PERFMON_SEL_F32_L1_WR_VLD = 0x0000003f, - SDMA_PERFMON_SEL_CE_L1_WR_VLD = 0x00000040, - SDMA_PERFMON_SEL_CPF_SDMA_INVREQ = 0x00000041, - SDMA_PERFMON_SEL_SDMA_CPF_INVACK = 0x00000042, - SDMA_PERFMON_SEL_UTCL2_SDMA_INVREQ = 0x00000043, - SDMA_PERFMON_SEL_SDMA_UTCL2_INVACK = 0x00000044, - SDMA_PERFMON_SEL_UTCL2_SDMA_INVREQ_ALL = 0x00000045, - SDMA_PERFMON_SEL_SDMA_UTCL2_INVACK_ALL = 0x00000046, - SDMA_PERFMON_SEL_UTCL2_RET_XNACK = 0x00000047, - SDMA_PERFMON_SEL_UTCL2_RET_ACK = 0x00000048, - SDMA_PERFMON_SEL_UTCL2_FREE = 0x00000049, - SDMA_PERFMON_SEL_SDMA_UTCL2_SEND = 0x0000004a, - SDMA_PERFMON_SEL_DMA_L1_WR_SEND = 0x0000004b, - SDMA_PERFMON_SEL_DMA_L1_RD_SEND = 0x0000004c, - SDMA_PERFMON_SEL_DMA_MC_WR_SEND = 0x0000004d, - SDMA_PERFMON_SEL_DMA_MC_RD_SEND = 0x0000004e, - SDMA_PERFMON_SEL_GPUVM_INV_HIGH = 0x0000004f, - SDMA_PERFMON_SEL_GPUVM_INV_LOW = 0x00000050, - SDMA_PERFMON_SEL_L1_WRL2_IDLE = 0x00000051, - SDMA_PERFMON_SEL_L1_RDL2_IDLE = 0x00000052, - SDMA_PERFMON_SEL_L1_WRMC_IDLE = 0x00000053, - SDMA_PERFMON_SEL_L1_RDMC_IDLE = 0x00000054, - SDMA_PERFMON_SEL_L1_WR_INV_IDLE = 0x00000055, - SDMA_PERFMON_SEL_L1_RD_INV_IDLE = 0x00000056, - SDMA_PERFMON_SEL_META_L2_REQ_SEND = 0x00000057, - SDMA_PERFMON_SEL_L2_META_RET_VLD = 0x00000058, - SDMA_PERFMON_SEL_SDMA_UTCL2_RD_SEND = 0x00000059, - SDMA_PERFMON_SEL_UTCL2_SDMA_RD_RTN = 0x0000005a, - SDMA_PERFMON_SEL_SDMA_UTCL2_WR_SEND = 0x0000005b, - SDMA_PERFMON_SEL_UTCL2_SDMA_WR_RTN = 0x0000005c, - SDMA_PERFMON_SEL_META_REQ_SEND = 0x0000005d, - SDMA_PERFMON_SEL_META_RTN_VLD = 0x0000005e, - SDMA_PERFMON_SEL_TLBI_SEND = 0x0000005f, - SDMA_PERFMON_SEL_TLBI_RTN = 0x00000060, - SDMA_PERFMON_SEL_GCR_SEND = 0x00000061, - SDMA_PERFMON_SEL_GCR_RTN = 0x00000062, - SDMA_PERFMON_SEL_UTCL1_TAG_DELAY_COUNTER = 0x00000063, - SDMA_PERFMON_SEL_MMHUB_TAG_DELAY_COUNTER = 0x00000064, -} SDMA_PERFMON_SEL; -#endif - -typedef enum SDMA_PERF_SEL { - SDMA_PERF_SEL_CYCLE = 0x00000000, - SDMA_PERF_SEL_IDLE = 0x00000001, - SDMA_PERF_SEL_REG_IDLE = 0x00000002, - SDMA_PERF_SEL_RB_EMPTY = 0x00000003, - SDMA_PERF_SEL_RB_FULL = 0x00000004, - SDMA_PERF_SEL_RB_WPTR_WRAP = 0x00000005, - SDMA_PERF_SEL_RB_RPTR_WRAP = 0x00000006, - SDMA_PERF_SEL_RB_WPTR_POLL_READ = 0x00000007, - SDMA_PERF_SEL_RB_RPTR_WB = 0x00000008, - SDMA_PERF_SEL_RB_CMD_IDLE = 0x00000009, - SDMA_PERF_SEL_RB_CMD_FULL = 0x0000000a, - SDMA_PERF_SEL_IB_CMD_IDLE = 0x0000000b, - SDMA_PERF_SEL_IB_CMD_FULL = 0x0000000c, - SDMA_PERF_SEL_EX_IDLE = 0x0000000d, - SDMA_PERF_SEL_SRBM_REG_SEND = 0x0000000e, - SDMA_PERF_SEL_EX_IDLE_POLL_TIMER_EXPIRE = 0x0000000f, - SDMA_PERF_SEL_MC_WR_IDLE = 0x00000010, - SDMA_PERF_SEL_MC_WR_COUNT = 0x00000011, - SDMA_PERF_SEL_MC_RD_IDLE = 0x00000012, - SDMA_PERF_SEL_MC_RD_COUNT = 0x00000013, - SDMA_PERF_SEL_MC_RD_RET_STALL = 0x00000014, - SDMA_PERF_SEL_MC_RD_NO_POLL_IDLE = 0x00000015, - SDMA_PERF_SEL_DRM_IDLE = 0x00000016, - SDMA_PERF_SEL_DRM_REQ_STALL = 0x00000017, - SDMA_PERF_SEL_SEM_IDLE = 0x00000018, - SDMA_PERF_SEL_SEM_REQ_STALL = 0x00000019, - SDMA_PERF_SEL_SEM_REQ_COUNT = 0x0000001a, - SDMA_PERF_SEL_SEM_RESP_INCOMPLETE = 0x0000001b, - SDMA_PERF_SEL_SEM_RESP_FAIL = 0x0000001c, - SDMA_PERF_SEL_SEM_RESP_PASS = 0x0000001d, - SDMA_PERF_SEL_INT_IDLE = 0x0000001e, - SDMA_PERF_SEL_INT_REQ_STALL = 0x0000001f, - SDMA_PERF_SEL_INT_REQ_COUNT = 0x00000020, - SDMA_PERF_SEL_INT_RESP_ACCEPTED = 0x00000021, - SDMA_PERF_SEL_INT_RESP_RETRY = 0x00000022, - SDMA_PERF_SEL_NUM_PACKET = 0x00000023, - SDMA_PERF_SEL_DRM1_REQ_STALL = 0x00000024, - SDMA_PERF_SEL_CE_WREQ_IDLE = 0x00000025, - SDMA_PERF_SEL_CE_WR_IDLE = 0x00000026, - SDMA_PERF_SEL_CE_SPLIT_IDLE = 0x00000027, - SDMA_PERF_SEL_CE_RREQ_IDLE = 0x00000028, - SDMA_PERF_SEL_CE_OUT_IDLE = 0x00000029, - SDMA_PERF_SEL_CE_IN_IDLE = 0x0000002a, - SDMA_PERF_SEL_CE_DST_IDLE = 0x0000002b, - SDMA_PERF_SEL_CE_DRM_IDLE = 0x0000002c, - SDMA_PERF_SEL_CE_DRM1_IDLE = 0x0000002d, - SDMA_PERF_SEL_CE_AFIFO_FULL = 0x0000002e, - SDMA_PERF_SEL_CE_INFO_FULL = 0x00000031, - SDMA_PERF_SEL_CE_INFO1_FULL = 0x00000032, - SDMA_PERF_SEL_CE_RD_STALL = 0x00000033, - SDMA_PERF_SEL_CE_WR_STALL = 0x00000034, - SDMA_PERF_SEL_CTX_CHANGE = 0x00000039, - SDMA_PERF_SEL_CTX_CHANGE_EXPIRED = 0x0000003a, - SDMA_PERF_SEL_CTX_CHANGE_EXCEPTION = 0x0000003b, - SDMA_PERF_SEL_DOORBELL = 0x0000003c, - SDMA_PERF_SEL_RD_BA_RTR = 0x0000003d, - SDMA_PERF_SEL_WR_BA_RTR = 0x0000003e, - SDMA_PERF_SEL_F32_L1_WR_VLD = 0x0000003f, - SDMA_PERF_SEL_CE_L1_WR_VLD = 0x00000040, - SDMA_PERF_SEL_CE_DRM_FULL__GFX09 = 0x0000002f, - SDMA_PERF_SEL_CE_DRM1_FULL__GFX09 = 0x00000030, - SDMA_PERF_SEL_GFX_SELECT__GFX09 = 0x00000035, - SDMA_PERF_SEL_RLC0_SELECT__GFX09 = 0x00000036, - SDMA_PERF_SEL_RLC1_SELECT__GFX09 = 0x00000037, - SDMA_PERF_SEL_PAGE_SELECT__GFX09 = 0x00000038, - SDMA_PERF_SEL_CE_L1_STALL__GFX09 = 0x00000041, - SDMA_PERF_SEL_SDMA_INVACK_NFLUSH__GFX09 = 0x00000042, - SDMA_PERF_SEL_SDMA_INVACK_FLUSH__GFX09 = 0x00000043, - SDMA_PERF_SEL_ATCL2_INVREQ_NFLUSH__GFX09 = 0x00000044, - SDMA_PERF_SEL_ATCL2_INVREQ_FLUSH__GFX09 = 0x00000045, - SDMA_PERF_SEL_ATCL2_RET_XNACK__GFX09 = 0x00000046, - SDMA_PERF_SEL_ATCL2_RET_ACK__GFX09 = 0x00000047, - SDMA_PERF_SEL_ATCL2_FREE__GFX09 = 0x00000048, - SDMA_PERF_SEL_SDMA_ATCL2_SEND__GFX09 = 0x00000049, - SDMA_PERF_SEL_DMA_L1_WR_SEND__GFX09 = 0x0000004a, - SDMA_PERF_SEL_DMA_L1_RD_SEND__GFX09 = 0x0000004b, - SDMA_PERF_SEL_DMA_MC_WR_SEND__GFX09 = 0x0000004c, - SDMA_PERF_SEL_DMA_MC_RD_SEND__GFX09 = 0x0000004d, - SDMA_PERF_SEL_L1_WR_FIFO_IDLE__GFX09 = 0x0000004e, - SDMA_PERF_SEL_L1_RD_FIFO_IDLE__GFX09 = 0x0000004f, - SDMA_PERF_SEL_L1_WRL2_IDLE__GFX09 = 0x00000050, - SDMA_PERF_SEL_L1_RDL2_IDLE__GFX09 = 0x00000051, - SDMA_PERF_SEL_L1_WRMC_IDLE__GFX09 = 0x00000052, - SDMA_PERF_SEL_L1_RDMC_IDLE__GFX09 = 0x00000053, - SDMA_PERF_SEL_L1_WR_INV_IDLE__GFX09 = 0x00000054, - SDMA_PERF_SEL_L1_RD_INV_IDLE__GFX09 = 0x00000055, - SDMA_PERF_SEL_L1_WR_INV_EN__GFX09 = 0x00000056, - SDMA_PERF_SEL_L1_RD_INV_EN__GFX09 = 0x00000057, - SDMA_PERF_SEL_L1_WR_WAIT_INVADR__GFX09 = 0x00000058, - SDMA_PERF_SEL_L1_RD_WAIT_INVADR__GFX09 = 0x00000059, - SDMA_PERF_SEL_IS_INVREQ_ADDR_WR__GFX09 = 0x0000005a, - SDMA_PERF_SEL_IS_INVREQ_ADDR_RD__GFX09 = 0x0000005b, - SDMA_PERF_SEL_L1_WR_XNACK_TIMEOUT__GFX09 = 0x0000005c, - SDMA_PERF_SEL_L1_RD_XNACK_TIMEOUT__GFX09 = 0x0000005d, - SDMA_PERF_SEL_L1_INV_MIDDLE__GFX09 = 0x0000005e, - SDMA_PERF_SEL_UTCL1_TAG_DELAY_COUNTER__GFX09 = 0x000000fe, - SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER__GFX09 = 0x000000ff, - SDMA_PERF_SEL_CE_CH_WR_REQ__GFX103 = 0x00000065, - SDMA_PERF_SEL_CE_CH_WR_RET__GFX103 = 0x00000066, - SDMA_PERF_SEL_F32_CH_WR_REQ__GFX103 = 0x00000067, - SDMA_PERF_SEL_F32_CH_WR_RET__GFX103 = 0x00000068, - SDMA_PERF_SEL_CE_OR_F32_CH_RD_REQ__GFX103 = 0x00000069, - SDMA_PERF_SEL_CE_OR_F32_CH_RD_RET__GFX103 = 0x0000006a, - SDMA_PERF_SEL_RB_CH_RD_REQ__GFX103 = 0x0000006b, - SDMA_PERF_SEL_RB_CH_RD_RET__GFX103 = 0x0000006c, - SDMA_PERF_SEL_IB_CH_RD_REQ__GFX103 = 0x0000006d, - SDMA_PERF_SEL_IB_CH_RD_RET__GFX103 = 0x0000006e, - SDMA_PERF_SEL_WPTR_CH_RD_REQ__GFX103 = 0x0000006f, - SDMA_PERF_SEL_WPTR_CH_RD_RET__GFX103 = 0x00000070, - SDMA_PERF_SEL_UTCL1_UTCL2_REQ__GFX103 = 0x00000071, - SDMA_PERF_SEL_UTCL1_UTCL2_RET__GFX103 = 0x00000072, - SDMA_PERF_SEL_CMD_OP_MATCH__GFX103 = 0x00000073, - SDMA_PERF_SEL_CMD_OP_START__GFX103 = 0x00000074, - SDMA_PERF_SEL_CMD_OP_END__GFX103 = 0x00000075, - SDMA_PERF_SEL_CE_BUSY__GFX103 = 0x00000076, - SDMA_PERF_SEL_CE_BUSY_START__GFX103 = 0x00000077, - SDMA_PERF_SEL_CE_BUSY_END__GFX103 = 0x00000078, - SDMA_PERF_SEL_F32_PERFCNT_TRIGGER__GFX103 = 0x00000079, - SDMA_PERF_SEL_F32_PERFCNT_TRIGGER_START__GFX103 = 0x0000007a, - SDMA_PERF_SEL_F32_PERFCNT_TRIGGER_END__GFX103 = 0x0000007b, - SDMA_PERF_SEL_CE_CH_WRREQ_SEND__GFX103 = 0x0000007c, - SDMA_PERF_SEL_CH_CE_WRRET_VALID__GFX103 = 0x0000007d, - SDMA_PERF_SEL_CE_CH_RDREQ_SEND__GFX103 = 0x0000007e, - SDMA_PERF_SEL_CH_CE_RDRET_VALID__GFX103 = 0x0000007f, - SDMA_PERF_SEL_GPUVM_INV_HIGH__GFX103COREPLUS = 0x0000004f, - SDMA_PERF_SEL_GPUVM_INV_LOW__GFX103COREPLUS = 0x00000050, - SDMA_PERF_SEL_CE_DRM_FULL__GFX10CORE = 0x0000002f, - SDMA_PERF_SEL_CE_DRM1_FULL__GFX10CORE = 0x00000030, - SDMA_PERF_SEL_GFX_SELECT__GFX10CORE = 0x00000035, - SDMA_PERF_SEL_RLC0_SELECT__GFX10CORE = 0x00000036, - SDMA_PERF_SEL_RLC1_SELECT__GFX10CORE = 0x00000037, - SDMA_PERF_SEL_PAGE_SELECT__GFX10CORE = 0x00000038, - SDMA_PERF_SEL_UTCL1_TAG_DELAY_COUNTER__GFX10CORE = 0x00000063, - SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER__GFX10CORE = 0x00000064, - SDMA_PERF_SEL_CPF_SDMA_INVREQ__GFX10COREPLUS = 0x00000041, - SDMA_PERF_SEL_SDMA_CPF_INVACK__GFX10COREPLUS = 0x00000042, - SDMA_PERF_SEL_UTCL2_SDMA_INVREQ__GFX10COREPLUS = 0x00000043, - SDMA_PERF_SEL_SDMA_UTCL2_INVACK__GFX10COREPLUS = 0x00000044, - SDMA_PERF_SEL_UTCL2_SDMA_INVREQ_ALL__GFX10COREPLUS = 0x00000045, - SDMA_PERF_SEL_SDMA_UTCL2_INVACK_ALL__GFX10COREPLUS = 0x00000046, - SDMA_PERF_SEL_UTCL2_RET_XNACK__GFX10COREPLUS = 0x00000047, - SDMA_PERF_SEL_UTCL2_RET_ACK__GFX10COREPLUS = 0x00000048, - SDMA_PERF_SEL_UTCL2_FREE__GFX10COREPLUS = 0x00000049, - SDMA_PERF_SEL_SDMA_UTCL2_SEND__GFX10COREPLUS = 0x0000004a, - SDMA_PERF_SEL_DMA_L1_WR_SEND__GFX10COREPLUS = 0x0000004b, - SDMA_PERF_SEL_DMA_L1_RD_SEND__GFX10COREPLUS = 0x0000004c, - SDMA_PERF_SEL_DMA_MC_WR_SEND__GFX10COREPLUS = 0x0000004d, - SDMA_PERF_SEL_DMA_MC_RD_SEND__GFX10COREPLUS = 0x0000004e, - SDMA_PERF_SEL_L1_WRL2_IDLE__GFX10COREPLUS = 0x00000051, - SDMA_PERF_SEL_L1_RDL2_IDLE__GFX10COREPLUS = 0x00000052, - SDMA_PERF_SEL_L1_WRMC_IDLE__GFX10COREPLUS = 0x00000053, - SDMA_PERF_SEL_L1_RDMC_IDLE__GFX10COREPLUS = 0x00000054, - SDMA_PERF_SEL_L1_WR_INV_IDLE__GFX10COREPLUS = 0x00000055, - SDMA_PERF_SEL_L1_RD_INV_IDLE__GFX10COREPLUS = 0x00000056, - SDMA_PERF_SEL_META_L2_REQ_SEND__GFX10COREPLUS = 0x00000057, - SDMA_PERF_SEL_L2_META_RET_VLD__GFX10COREPLUS = 0x00000058, - SDMA_PERF_SEL_SDMA_UTCL2_RD_SEND__GFX10COREPLUS = 0x00000059, - SDMA_PERF_SEL_UTCL2_SDMA_RD_RTN__GFX10COREPLUS = 0x0000005a, - SDMA_PERF_SEL_SDMA_UTCL2_WR_SEND__GFX10COREPLUS = 0x0000005b, - SDMA_PERF_SEL_UTCL2_SDMA_WR_RTN__GFX10COREPLUS = 0x0000005c, - SDMA_PERF_SEL_META_REQ_SEND__GFX10COREPLUS = 0x0000005d, - SDMA_PERF_SEL_META_RTN_VLD__GFX10COREPLUS = 0x0000005e, - SDMA_PERF_SEL_TLBI_SEND__GFX10COREPLUS = 0x0000005f, - SDMA_PERF_SEL_TLBI_RTN__GFX10COREPLUS = 0x00000060, - SDMA_PERF_SEL_GCR_SEND__GFX10COREPLUS = 0x00000061, - SDMA_PERF_SEL_GCR_RTN__GFX10COREPLUS = 0x00000062, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SDMA_PERF_SEL_DUMMY_0__GFX11 = 0x0000002f, - SDMA_PERF_SEL_DUMMY_1__GFX11 = 0x00000030, - SDMA_PERF_SEL_QUEUE0_SELECT__GFX11 = 0x00000035, - SDMA_PERF_SEL_QUEUE1_SELECT__GFX11 = 0x00000036, - SDMA_PERF_SEL_QUEUE2_SELECT__GFX11 = 0x00000037, - SDMA_PERF_SEL_QUEUE3_SELECT__GFX11 = 0x00000038, - SDMA_PERF_SEL_CGCG_FENCE__GFX11 = 0x00000063, - SDMA_PERF_SEL_CE_CH_WR_REQ__GFX11 = 0x00000064, - SDMA_PERF_SEL_CE_CH_WR_RET__GFX11 = 0x00000065, - SDMA_PERF_SEL_F32_CH_WR_REQ__GFX11 = 0x00000066, - SDMA_PERF_SEL_F32_CH_WR_RET__GFX11 = 0x00000067, - SDMA_PERF_SEL_CE_OR_F32_CH_RD_REQ__GFX11 = 0x00000068, - SDMA_PERF_SEL_CE_OR_F32_CH_RD_RET__GFX11 = 0x00000069, - SDMA_PERF_SEL_RB_CH_RD_REQ__GFX11 = 0x0000006a, - SDMA_PERF_SEL_RB_CH_RD_RET__GFX11 = 0x0000006b, - SDMA_PERF_SEL_IB_CH_RD_REQ__GFX11 = 0x0000006c, - SDMA_PERF_SEL_IB_CH_RD_RET__GFX11 = 0x0000006d, - SDMA_PERF_SEL_WPTR_CH_RD_REQ__GFX11 = 0x0000006e, - SDMA_PERF_SEL_WPTR_CH_RD_RET__GFX11 = 0x0000006f, - SDMA_PERF_SEL_UTCL1_UTCL2_REQ__GFX11 = 0x00000070, - SDMA_PERF_SEL_UTCL1_UTCL2_RET__GFX11 = 0x00000071, - SDMA_PERF_SEL_CMD_OP_MATCH__GFX11 = 0x00000072, - SDMA_PERF_SEL_CMD_OP_START__GFX11 = 0x00000073, - SDMA_PERF_SEL_CMD_OP_END__GFX11 = 0x00000074, - SDMA_PERF_SEL_CE_BUSY__GFX11 = 0x00000075, - SDMA_PERF_SEL_CE_BUSY_START__GFX11 = 0x00000076, - SDMA_PERF_SEL_CE_BUSY_END__GFX11 = 0x00000077, - SDMA_PERF_SEL_F32_PERFCNT_TRIGGER__GFX11 = 0x00000078, - SDMA_PERF_SEL_F32_PERFCNT_TRIGGER_START__GFX11 = 0x00000079, - SDMA_PERF_SEL_F32_PERFCNT_TRIGGER_END__GFX11 = 0x0000007a, - SDMA_PERF_SEL_CE_CH_WRREQ_SEND__GFX11 = 0x0000007b, - SDMA_PERF_SEL_CH_CE_WRRET_VALID__GFX11 = 0x0000007c, - SDMA_PERF_SEL_CE_CH_RDREQ_SEND__GFX11 = 0x0000007d, - SDMA_PERF_SEL_CH_CE_RDRET_VALID__GFX11 = 0x0000007e, - SDMA_PERF_SEL_QUEUE4_SELECT__GFX11 = 0x0000007f, - SDMA_PERF_SEL_QUEUE5_SELECT__GFX11 = 0x00000080, - SDMA_PERF_SEL_QUEUE6_SELECT__GFX11 = 0x00000081, - SDMA_PERF_SEL_QUEUE7_SELECT__GFX11 = 0x00000082, -#endif - SDMA_PERF_SEL_GPUVM_INVREQ_HIGH__OSS50 = 0x0000004f, - SDMA_PERF_SEL_GPUVM_INVREQ_LOW__OSS50 = 0x00000050, -} SDMA_PERF_SEL; - -constexpr unsigned int MaxSdmaPerfSelGfx09 = SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER__GFX09; -constexpr unsigned int MaxSdmaPerfSelOss50 = SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER__GFX10CORE; -constexpr unsigned int MaxSdmaPerfSelGfx103 = SDMA_PERF_SEL_CH_CE_RDRET_VALID__GFX103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxSdmaPerfSelGfx11 = SDMA_PERF_SEL_QUEUE7_SELECT__GFX11; -#endif - -typedef enum SH_MEM_ADDRESS_MODE { - SH_MEM_ADDRESS_MODE_64 = 0x00000000, - SH_MEM_ADDRESS_MODE_32 = 0x00000001, -} SH_MEM_ADDRESS_MODE; - -typedef enum SH_MEM_ALIGNMENT_MODE { - SH_MEM_ALIGNMENT_MODE_DWORD = 0x00000000, - SH_MEM_ALIGNMENT_MODE_DWORD_STRICT = 0x00000001, - SH_MEM_ALIGNMENT_MODE_STRICT = 0x00000002, - SH_MEM_ALIGNMENT_MODE_UNALIGNED__CORE = 0x00000003, -} SH_MEM_ALIGNMENT_MODE; - -typedef enum SH_MEM_RETRY_MODE { - SH_MEM_RETRY_MODE_ALL__GFX101 = 0x00000000, - SH_MEM_RETRY_MODE_WRITEATOMIC__GFX101 = 0x00000001, - SH_MEM_RETRY_MODE_NONE__GFX101 = 0x00000002, -} SH_MEM_RETRY_MODE; - -typedef enum SPI_FOG_MODE { - SPI_FOG_NONE = 0x00000000, - SPI_FOG_EXP = 0x00000001, - SPI_FOG_EXP2 = 0x00000002, - SPI_FOG_LINEAR = 0x00000003, -} SPI_FOG_MODE; - -typedef enum SPI_LB_WAVES_SELECT { - HS_GS = 0x00000000, - CS_NA = 0x00000002, - SPI_LB_WAVES_RSVD = 0x00000003, - VS_PS__GFX10 = 0x00000001, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - PS__GFX11 = 0x00000001, -#endif -} SPI_LB_WAVES_SELECT; - -typedef enum SPI_PERFCNT_SEL { -#if CHIP_HDR_PHOENIX1 - SPI_PERF_GS_GRP_LIFETIME_SAMPLE__APU11 = 0x00000012, -#endif - SPI_PERF_VS_PC_STALL__GFX09 = 0x00000005, - SPI_PERF_VS_POS0_STALL__GFX09 = 0x00000006, - SPI_PERF_VS_POS1_STALL__GFX09 = 0x00000007, - SPI_PERF_VS_CRAWLER_STALL__GFX09 = 0x00000008, - SPI_PERF_VS_EVENT_WAVE__GFX09 = 0x00000009, - SPI_PERF_VS_WAVE__GFX09 = 0x0000000a, - SPI_PERF_VS_PERS_UPD_FULL0__GFX09 = 0x0000000b, - SPI_PERF_VS_PERS_UPD_FULL1__GFX09 = 0x0000000c, - SPI_PERF_VS_LATE_ALLOC_FULL__GFX09 = 0x0000000d, - SPI_PERF_VS_FIRST_SUBGRP__GFX09 = 0x0000000e, - SPI_PERF_VS_LAST_SUBGRP__GFX09 = 0x0000000f, - SPI_PERF_VS_ALLOC_CNT__GFX09 = 0x00000010, - SPI_PERF_VS_PC_ALLOC_CNT__GFX09 = 0x00000011, - SPI_PERF_VS_LATE_ALLOC_ACCUM__GFX09 = 0x00000012, - SPI_PERF_GS_WINDOW_VALID__GFX09 = 0x00000013, - SPI_PERF_GS_BUSY__GFX09 = 0x00000014, - SPI_PERF_GS_CRAWLER_STALL__GFX09 = 0x00000015, - SPI_PERF_GS_EVENT_WAVE__GFX09 = 0x00000016, - SPI_PERF_GS_WAVE__GFX09 = 0x00000017, - SPI_PERF_GS_PERS_UPD_FULL0__GFX09 = 0x00000018, - SPI_PERF_GS_PERS_UPD_FULL1__GFX09 = 0x00000019, - SPI_PERF_GS_FIRST_SUBGRP__GFX09 = 0x0000001a, - SPI_PERF_GS_LAST_SUBGRP__GFX09 = 0x0000001b, - SPI_PERF_GS_HS_DEALLOC__GFX09 = 0x0000001c, - SPI_PERF_GS_NGG_SE_LATE_ALLOC_LIMIT__GFX09 = 0x0000001d, - SPI_PERF_GS_GRP_FIFO_FULL__GFX09 = 0x0000001e, - SPI_PERF_HS_WINDOW_VALID__GFX09 = 0x0000001f, - SPI_PERF_HS_BUSY__GFX09 = 0x00000020, - SPI_PERF_HS_CRAWLER_STALL__GFX09 = 0x00000021, - SPI_PERF_HS_FIRST_WAVE__GFX09 = 0x00000022, - SPI_PERF_HS_LAST_WAVE__GFX09 = 0x00000023, - SPI_PERF_HS_OFFCHIP_LDS_STALL__GFX09 = 0x00000024, - SPI_PERF_HS_EVENT_WAVE__GFX09 = 0x00000025, - SPI_PERF_HS_WAVE__GFX09 = 0x00000026, - SPI_PERF_HS_PERS_UPD_FULL0__GFX09 = 0x00000027, - SPI_PERF_HS_PERS_UPD_FULL1__GFX09 = 0x00000028, - SPI_PERF_CSG_WINDOW_VALID__GFX09 = 0x00000029, - SPI_PERF_CSG_BUSY__GFX09 = 0x0000002a, - SPI_PERF_CSG_NUM_THREADGROUPS__GFX09 = 0x0000002b, - SPI_PERF_CSG_CRAWLER_STALL__GFX09 = 0x0000002c, - SPI_PERF_CSG_EVENT_WAVE__GFX09 = 0x0000002d, - SPI_PERF_CSG_WAVE__GFX09 = 0x0000002e, - SPI_PERF_CSN_WINDOW_VALID__GFX09 = 0x0000002f, - SPI_PERF_CSN_BUSY__GFX09 = 0x00000030, - SPI_PERF_CSN_NUM_THREADGROUPS__GFX09 = 0x00000031, - SPI_PERF_CSN_CRAWLER_STALL__GFX09 = 0x00000032, - SPI_PERF_CSN_EVENT_WAVE__GFX09 = 0x00000033, - SPI_PERF_CSN_WAVE__GFX09 = 0x00000034, - SPI_PERF_PS_WINDOW_VALID__GFX09 = 0x00000035, - SPI_PERF_PS_BUSY__GFX09 = 0x00000036, - SPI_PERF_PS_ACTIVE__GFX09 = 0x00000037, - SPI_PERF_PS_DEALLOC_BIN0__GFX09 = 0x00000038, - SPI_PERF_PS_FPOS_BIN1_STALL__GFX09 = 0x00000039, - SPI_PERF_PS_EVENT_WAVE__GFX09 = 0x0000003a, - SPI_PERF_PS_WAVE__GFX09 = 0x0000003b, - SPI_PERF_PS_OPT_WAVE__GFX09 = 0x0000003c, - SPI_PERF_PS_PASS_BIN0__GFX09 = 0x0000003d, - SPI_PERF_PS_PASS_BIN1__GFX09 = 0x0000003e, - SPI_PERF_PS_FPOS_BIN2__GFX09 = 0x0000003f, - SPI_PERF_PS_PRIM_BIN0__GFX09 = 0x00000040, - SPI_PERF_PS_PRIM_BIN1__GFX09 = 0x00000041, - SPI_PERF_PS_CNF_BIN2__GFX09 = 0x00000042, - SPI_PERF_PS_CNF_BIN3__GFX09 = 0x00000043, - SPI_PERF_PS_CRAWLER_STALL__GFX09 = 0x00000044, - SPI_PERF_PS_LDS_RES_FULL__GFX09 = 0x00000045, - SPI_PERF_PS_PERS_UPD_FULL0__GFX09 = 0x00000046, - SPI_PERF_PS_PERS_UPD_FULL1__GFX09 = 0x00000047, - SPI_PERF_PS_POPS_WAVE_SENT__GFX09 = 0x00000048, - SPI_PERF_PS_POPS_WAVE_EXIT__GFX09 = 0x00000049, - SPI_PERF_LDS0_PC_VALID__GFX09 = 0x0000004a, - SPI_PERF_LDS1_PC_VALID__GFX09 = 0x0000004b, - SPI_PERF_RA_PIPE_REQ_BIN2__GFX09 = 0x0000004c, - SPI_PERF_RA_TASK_REQ_BIN3__GFX09 = 0x0000004d, - SPI_PERF_RA_WR_CTL_FULL__GFX09 = 0x0000004e, - SPI_PERF_RA_REQ_NO_ALLOC__GFX09 = 0x0000004f, - SPI_PERF_RA_REQ_NO_ALLOC_PS__GFX09 = 0x00000050, - SPI_PERF_RA_REQ_NO_ALLOC_VS__GFX09 = 0x00000051, - SPI_PERF_RA_REQ_NO_ALLOC_GS__GFX09 = 0x00000052, - SPI_PERF_RA_REQ_NO_ALLOC_HS__GFX09 = 0x00000053, - SPI_PERF_RA_REQ_NO_ALLOC_CSG__GFX09 = 0x00000054, - SPI_PERF_RA_REQ_NO_ALLOC_CSN__GFX09 = 0x00000055, - SPI_PERF_RA_RES_STALL_PS__GFX09 = 0x00000056, - SPI_PERF_RA_RES_STALL_VS__GFX09 = 0x00000057, - SPI_PERF_RA_RES_STALL_GS__GFX09 = 0x00000058, - SPI_PERF_RA_RES_STALL_HS__GFX09 = 0x00000059, - SPI_PERF_RA_RES_STALL_CSG__GFX09 = 0x0000005a, - SPI_PERF_RA_RES_STALL_CSN__GFX09 = 0x0000005b, - SPI_PERF_RA_TMP_STALL_PS__GFX09 = 0x0000005c, - SPI_PERF_RA_TMP_STALL_VS__GFX09 = 0x0000005d, - SPI_PERF_RA_TMP_STALL_GS__GFX09 = 0x0000005e, - SPI_PERF_RA_TMP_STALL_HS__GFX09 = 0x0000005f, - SPI_PERF_RA_TMP_STALL_CSG__GFX09 = 0x00000060, - SPI_PERF_RA_TMP_STALL_CSN__GFX09 = 0x00000061, - SPI_PERF_RA_WAVE_SIMD_FULL_PS__GFX09 = 0x00000062, - SPI_PERF_RA_WAVE_SIMD_FULL_VS__GFX09 = 0x00000063, - SPI_PERF_RA_WAVE_SIMD_FULL_GS__GFX09 = 0x00000064, - SPI_PERF_RA_WAVE_SIMD_FULL_HS__GFX09 = 0x00000065, - SPI_PERF_RA_WAVE_SIMD_FULL_CSG__GFX09 = 0x00000066, - SPI_PERF_RA_WAVE_SIMD_FULL_CSN__GFX09 = 0x00000067, - SPI_PERF_RA_VGPR_SIMD_FULL_PS__GFX09 = 0x00000068, - SPI_PERF_RA_VGPR_SIMD_FULL_VS__GFX09 = 0x00000069, - SPI_PERF_RA_VGPR_SIMD_FULL_GS__GFX09 = 0x0000006a, - SPI_PERF_RA_VGPR_SIMD_FULL_HS__GFX09 = 0x0000006b, - SPI_PERF_RA_VGPR_SIMD_FULL_CSG__GFX09 = 0x0000006c, - SPI_PERF_RA_VGPR_SIMD_FULL_CSN__GFX09 = 0x0000006d, - SPI_PERF_RA_SGPR_SIMD_FULL_PS__GFX09 = 0x0000006e, - SPI_PERF_RA_SGPR_SIMD_FULL_VS__GFX09 = 0x0000006f, - SPI_PERF_RA_SGPR_SIMD_FULL_GS__GFX09 = 0x00000070, - SPI_PERF_RA_SGPR_SIMD_FULL_HS__GFX09 = 0x00000071, - SPI_PERF_RA_SGPR_SIMD_FULL_CSG__GFX09 = 0x00000072, - SPI_PERF_RA_SGPR_SIMD_FULL_CSN__GFX09 = 0x00000073, - SPI_PERF_RA_LDS_CU_FULL_PS__GFX09 = 0x00000074, - SPI_PERF_RA_LDS_CU_FULL_LS__GFX09 = 0x00000075, - SPI_PERF_RA_LDS_CU_FULL_ES__GFX09 = 0x00000076, - SPI_PERF_RA_LDS_CU_FULL_CSG__GFX09 = 0x00000077, - SPI_PERF_RA_LDS_CU_FULL_CSN__GFX09 = 0x00000078, - SPI_PERF_RA_BAR_CU_FULL_HS__GFX09 = 0x00000079, - SPI_PERF_RA_BAR_CU_FULL_CSG__GFX09 = 0x0000007a, - SPI_PERF_RA_BAR_CU_FULL_CSN__GFX09 = 0x0000007b, - SPI_PERF_RA_BULKY_CU_FULL_CSG__GFX09 = 0x0000007c, - SPI_PERF_RA_BULKY_CU_FULL_CSN__GFX09 = 0x0000007d, - SPI_PERF_RA_TGLIM_CU_FULL_CSG__GFX09 = 0x0000007e, - SPI_PERF_RA_TGLIM_CU_FULL_CSN__GFX09 = 0x0000007f, - SPI_PERF_RA_WVLIM_STALL_PS__GFX09 = 0x00000080, - SPI_PERF_RA_WVLIM_STALL_VS__GFX09 = 0x00000081, - SPI_PERF_RA_WVLIM_STALL_GS__GFX09 = 0x00000082, - SPI_PERF_RA_WVLIM_STALL_HS__GFX09 = 0x00000083, - SPI_PERF_RA_WVLIM_STALL_CSG__GFX09 = 0x00000084, - SPI_PERF_RA_WVLIM_STALL_CSN__GFX09 = 0x00000085, - SPI_PERF_RA_VS_LOCK__GFX09 = 0x00000086, - SPI_PERF_RA_GS_LOCK__GFX09 = 0x00000087, - SPI_PERF_RA_HS_LOCK__GFX09 = 0x00000088, - SPI_PERF_RA_CSG_LOCK__GFX09 = 0x00000089, - SPI_PERF_RA_CSN_LOCK__GFX09 = 0x0000008a, - SPI_PERF_RA_RSV_UPD__GFX09 = 0x0000008b, - SPI_PERF_EXP_ARB_COL_CNT__GFX09 = 0x0000008c, - SPI_PERF_EXP_ARB_PAR_CNT__GFX09 = 0x0000008d, - SPI_PERF_EXP_ARB_POS_CNT__GFX09 = 0x0000008e, - SPI_PERF_EXP_ARB_GDS_CNT__GFX09 = 0x0000008f, - SPI_PERF_NUM_PS_COL_R0_EXPORTS__GFX09 = 0x00000090, - SPI_PERF_NUM_PS_COL_R1_EXPORTS__GFX09 = 0x00000091, - SPI_PERF_NUM_VS_POS_R0_EXPORTS__GFX09 = 0x00000092, - SPI_PERF_NUM_VS_POS_R1_EXPORTS__GFX09 = 0x00000093, - SPI_PERF_NUM_VS_PARAM_R0_EXPORTS__GFX09 = 0x00000094, - SPI_PERF_NUM_VS_PARAM_R1_EXPORTS__GFX09 = 0x00000095, - SPI_PERF_NUM_VS_GDS_R0_EXPORTS__GFX09 = 0x00000096, - SPI_PERF_NUM_VS_GDS_R1_EXPORTS__GFX09 = 0x00000097, - SPI_PERF_NUM_EXPGRANT_EXPORTS__GFX09 = 0x00000098, - SPI_PERF_CLKGATE_BUSY_STALL__GFX09 = 0x00000099, - SPI_PERF_CLKGATE_ACTIVE_STALL__GFX09 = 0x0000009a, - SPI_PERF_CLKGATE_ALL_CLOCKS_ON__GFX09 = 0x0000009b, - SPI_PERF_CLKGATE_CGTT_DYN_ON__GFX09 = 0x0000009c, - SPI_PERF_CLKGATE_CGTT_REG_ON__GFX09 = 0x0000009d, - SPI_PERF_PIX_ALLOC_PEND_CNT__GFX09 = 0x0000009e, - SPI_PERF_PIX_ALLOC_SCB_STALL__GFX09 = 0x0000009f, - SPI_PERF_PIX_ALLOC_DB0_STALL__GFX09 = 0x000000a0, - SPI_PERF_PIX_ALLOC_DB1_STALL__GFX09 = 0x000000a1, - SPI_PERF_PIX_ALLOC_DB2_STALL__GFX09 = 0x000000a2, - SPI_PERF_PIX_ALLOC_DB3_STALL__GFX09 = 0x000000a3, - SPI_PERF_PC_ALLOC_ACCUM__GFX09 = 0x000000a4, - SPI_PERF_GS_NGG_SE_HAS_BATON__GFX09 = 0x000000a5, - SPI_PERF_GS_NGG_SE_DOES_NOT_HAVE_BATON__GFX09 = 0x000000a6, - SPI_PERF_GS_NGG_SE_FORWARDED_BATON__GFX09 = 0x000000a7, - SPI_PERF_GS_NGG_SE_AT_SYNC_EVENT__GFX09 = 0x000000a8, - SPI_PERF_GS_NGG_SE_SG_ALLOC_PC_SPACE_CNT__GFX09 = 0x000000a9, - SPI_PERF_GS_NGG_SE_DEALLOC_PC_SPACE_CNT__GFX09 = 0x000000aa, - SPI_PERF_GS_NGG_PC_FULL__GFX09 = 0x000000ab, - SPI_PERF_GS_NGG_SE_SEND_GS_ALLOC__GFX09 = 0x000000ac, - SPI_PERF_GS_NGG_GS_ALLOC_FIFO_EMPTY__GFX09 = 0x000000ad, - SPI_PERF_GSC_VTX_BUSY__GFX09 = 0x000000ae, - SPI_PERF_GSC_VTX_INPUT_STARVED__GFX09 = 0x000000af, - SPI_PERF_GSC_VTX_VSR_STALL__GFX09 = 0x000000b0, - SPI_PERF_GSC_VTX_VSR_FULL__GFX09 = 0x000000b1, - SPI_PERF_GSC_VTX_CAC_BUSY__GFX09 = 0x000000b2, - SPI_PERF_ESC_VTX_BUSY__GFX09 = 0x000000b3, - SPI_PERF_ESC_VTX_INPUT_STARVED__GFX09 = 0x000000b4, - SPI_PERF_ESC_VTX_VSR_STALL__GFX09 = 0x000000b5, - SPI_PERF_ESC_VTX_VSR_FULL__GFX09 = 0x000000b6, - SPI_PERF_ESC_VTX_CAC_BUSY__GFX09 = 0x000000b7, - SPI_PERF_SWC_PS_WR__GFX09 = 0x000000b8, - SPI_PERF_SWC_VS_WR__GFX09 = 0x000000b9, - SPI_PERF_SWC_GS_WR__GFX09 = 0x000000ba, - SPI_PERF_SWC_HS_WR__GFX09 = 0x000000bb, - SPI_PERF_SWC_CSG_WR__GFX09 = 0x000000bc, - SPI_PERF_SWC_CSC_WR__GFX09 = 0x000000bd, - SPI_PERF_VWC_PS_WR__GFX09 = 0x000000be, - SPI_PERF_VWC_VS_WR__GFX09 = 0x000000bf, - SPI_PERF_VWC_GS_WR__GFX09 = 0x000000c0, - SPI_PERF_VWC_HS_WR__GFX09 = 0x000000c1, - SPI_PERF_VWC_CSG_WR__GFX09 = 0x000000c2, - SPI_PERF_VWC_CSC_WR__GFX09 = 0x000000c3, - SPI_PERF_VS_POS0_STALL__GFX10 = 0x00000005, - SPI_PERF_VS_POS1_STALL__GFX10 = 0x00000006, - SPI_PERF_VS_CRAWLER_STALL__GFX10 = 0x00000007, - SPI_PERF_VS_EVENT_WAVE__GFX10 = 0x00000008, - SPI_PERF_VS_WAVE__GFX10 = 0x00000009, - SPI_PERF_VS_PERS_UPD_FULL0__GFX10 = 0x0000000a, - SPI_PERF_VS_PERS_UPD_FULL1__GFX10 = 0x0000000b, - SPI_PERF_VS_LATE_ALLOC_FULL__GFX10 = 0x0000000c, - SPI_PERF_VS_FIRST_SUBGRP__GFX10 = 0x0000000d, - SPI_PERF_VS_LAST_SUBGRP__GFX10 = 0x0000000e, - SPI_PERF_VS_ALLOC_CNT__GFX10 = 0x0000000f, - SPI_PERF_VS_LATE_ALLOC_ACCUM__GFX10 = 0x00000010, - SPI_PERF_GS_WINDOW_VALID__GFX10 = 0x00000011, - SPI_PERF_GS_BUSY__GFX10 = 0x00000012, - SPI_PERF_GS_CRAWLER_STALL__GFX10 = 0x00000013, - SPI_PERF_GS_EVENT_WAVE__GFX10 = 0x00000014, - SPI_PERF_GS_WAVE__GFX10 = 0x00000015, - SPI_PERF_GS_PERS_UPD_FULL0__GFX10 = 0x00000016, - SPI_PERF_GS_PERS_UPD_FULL1__GFX10 = 0x00000017, - SPI_PERF_GS_FIRST_SUBGRP__GFX10 = 0x00000018, - SPI_PERF_RA_REQ_NO_ALLOC_GS__GFX10 = 0x00000093, - SPI_PERF_RA_REQ_NO_ALLOC_ES__GFX101 = 0x00000092, - SPI_PERF_RA_REQ_NO_ALLOC_LS__GFX101 = 0x00000094, - SPI_PERF_RA_RES_STALL_ES__GFX101 = 0x0000009a, - SPI_PERF_RA_RES_STALL_LS__GFX101 = 0x0000009c, - SPI_PERF_RA_TMP_STALL_ES__GFX101 = 0x000000a2, - SPI_PERF_RA_TMP_STALL_LS__GFX101 = 0x000000a4, - SPI_PERF_RA_WAVE_SIMD_FULL_ES__GFX101 = 0x000000aa, - SPI_PERF_RA_WAVE_SIMD_FULL_LS__GFX101 = 0x000000ac, - SPI_PERF_RA_VGPR_SIMD_FULL_ES__GFX101 = 0x000000b2, - SPI_PERF_RA_VGPR_SIMD_FULL_LS__GFX101 = 0x000000b5, - SPI_PERF_RA_SGPR_SIMD_FULL_PS__GFX101 = 0x000000b8, - SPI_PERF_RA_SGPR_SIMD_FULL_VS__GFX101 = 0x000000b9, - SPI_PERF_RA_SGPR_SIMD_FULL_ES__GFX101 = 0x000000ba, - SPI_PERF_RA_SGPR_SIMD_FULL_GS__GFX101 = 0x000000bb, - SPI_PERF_RA_SGPR_SIMD_FULL_LS__GFX101 = 0x000000bc, - SPI_PERF_RA_SGPR_SIMD_FULL_HS__GFX101 = 0x000000bd, - SPI_PERF_RA_SGPR_SIMD_FULL_CSG__GFX101 = 0x000000be, - SPI_PERF_RA_SGPR_SIMD_FULL_CSN__GFX101 = 0x000000bf, - SPI_PERF_RA_LDS_CU_FULL_LS__GFX101 = 0x000000c1, - SPI_PERF_RA_LDS_CU_FULL_ES__GFX101 = 0x000000c3, - SPI_PERF_RA_WVLIM_STALL_ES__GFX101 = 0x000000d0, - SPI_PERF_RA_WVLIM_STALL_LS__GFX101 = 0x000000d2, - SPI_PERF_RA_ES_LOCK__GFX101 = 0x000000d8, - SPI_PERF_RA_LS_LOCK__GFX101 = 0x000000da, - SPI_PERF_SWC_PS_WR__GFX101 = 0x00000123, - SPI_PERF_SWC_VS_WR__GFX101 = 0x00000124, - SPI_PERF_SWC_ES_WR__GFX101 = 0x00000125, - SPI_PERF_SWC_LS_WR__GFX101 = 0x00000127, - SPI_PERF_VWC_ES_WR__GFX101 = 0x0000012d, - SPI_PERF_VWC_LS_WR__GFX101 = 0x0000012f, - SPI_PERF_ES_WINDOW_VALID__GFX101 = 0x00000133, - SPI_PERF_ES_BUSY__GFX101 = 0x00000134, - SPI_PERF_ES_CRAWLER_STALL__GFX101 = 0x00000135, - SPI_PERF_ES_FIRST_WAVE__GFX101 = 0x00000136, - SPI_PERF_ES_LAST_WAVE__GFX101 = 0x00000137, - SPI_PERF_ES_LSHS_DEALLOC__GFX101 = 0x00000138, - SPI_PERF_ES_EVENT_WAVE__GFX101 = 0x00000139, - SPI_PERF_ES_WAVE__GFX101 = 0x0000013a, - SPI_PERF_ES_PERS_UPD_FULL0__GFX101 = 0x0000013b, - SPI_PERF_ES_PERS_UPD_FULL1__GFX101 = 0x0000013c, - SPI_PERF_ES_FIRST_SUBGRP__GFX101 = 0x0000013d, - SPI_PERF_ES_LAST_SUBGRP__GFX101 = 0x0000013e, - SPI_PERF_LS_WINDOW_VALID__GFX101 = 0x0000013f, - SPI_PERF_LS_BUSY__GFX101 = 0x00000140, - SPI_PERF_LS_CRAWLER_STALL__GFX101 = 0x00000141, - SPI_PERF_LS_FIRST_WAVE__GFX101 = 0x00000142, - SPI_PERF_LS_LAST_WAVE__GFX101 = 0x00000143, - SPI_PERF_LS_OFFCHIP_LDS_STALL__GFX101 = 0x00000144, - SPI_PERF_LS_EVENT_WAVE__GFX101 = 0x00000145, - SPI_PERF_LS_WAVE__GFX101 = 0x00000146, - SPI_PERF_LS_PERS_UPD_FULL0__GFX101 = 0x00000147, - SPI_PERF_LS_PERS_UPD_FULL1__GFX101 = 0x00000148, - SPI_PERF_EXP_THROT_UPSTEP__GFX103 = 0x00000149, - SPI_PERF_EXP_THROT_DOWNSTEP__GFX103 = 0x0000014a, - SPI_PERF_EXP_THROT_CAUSALITY_DETECTED__GFX103 = 0x0000014b, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SPI_PERF_PS0_WINDOW_VALID__GFX104PLUS = 0x00000035, - SPI_PERF_PS1_WINDOW_VALID__GFX104PLUS = 0x00000036, - SPI_PERF_PS2_WINDOW_VALID__GFX104PLUS = 0x00000037, - SPI_PERF_PS3_WINDOW_VALID__GFX104PLUS = 0x00000038, - SPI_PERF_PS0_BUSY__GFX104PLUS = 0x00000039, - SPI_PERF_PS1_BUSY__GFX104PLUS = 0x0000003a, - SPI_PERF_PS2_BUSY__GFX104PLUS = 0x0000003b, - SPI_PERF_PS3_BUSY__GFX104PLUS = 0x0000003c, - SPI_PERF_PS0_ACTIVE__GFX104PLUS = 0x0000003d, - SPI_PERF_PS1_ACTIVE__GFX104PLUS = 0x0000003e, - SPI_PERF_PS2_ACTIVE__GFX104PLUS = 0x0000003f, - SPI_PERF_PS3_ACTIVE__GFX104PLUS = 0x00000040, - SPI_PERF_PS0_DEALLOC__GFX104PLUS = 0x00000041, - SPI_PERF_PS1_DEALLOC__GFX104PLUS = 0x00000042, - SPI_PERF_PS2_DEALLOC__GFX104PLUS = 0x00000043, - SPI_PERF_PS3_DEALLOC__GFX104PLUS = 0x00000044, - SPI_PERF_RA_PIPE_REQ_BIN2__GFX104PLUS = 0x0000008d, - SPI_PERF_RA_TASK_REQ_BIN3__GFX104PLUS = 0x0000008e, - SPI_PERF_RA_WR_CTL_FULL__GFX104PLUS = 0x0000008f, - SPI_PERF_RA_REQ_NO_ALLOC__GFX104PLUS = 0x00000090, - SPI_PERF_RA_REQ_NO_ALLOC_PS__GFX104PLUS = 0x00000091, -#endif - SPI_PERF_GS_LAST_SUBGRP__GFX10CORE = 0x00000019, - SPI_PERF_GS_HS_DEALLOC__GFX10CORE = 0x0000001a, - SPI_PERF_GS_NGG_SE_LATE_ALLOC_LIMIT__GFX10CORE = 0x0000001b, - SPI_PERF_GS_GRP_FIFO_FULL__GFX10CORE = 0x0000001c, - SPI_PERF_GS_POS0_STALL__GFX10CORE = 0x0000001d, - SPI_PERF_GS_POS1_STALL__GFX10CORE = 0x0000001e, - SPI_PERF_GS_INDX0_STALL__GFX10CORE = 0x0000001f, - SPI_PERF_GS_INDX1_STALL__GFX10CORE = 0x00000020, - SPI_PERF_HS_WINDOW_VALID__GFX10CORE = 0x00000021, - SPI_PERF_HS_BUSY__GFX10CORE = 0x00000022, - SPI_PERF_HS_CRAWLER_STALL__GFX10CORE = 0x00000023, - SPI_PERF_HS_FIRST_WAVE__GFX10CORE = 0x00000024, - SPI_PERF_HS_LAST_WAVE__GFX10CORE = 0x00000025, - SPI_PERF_HS_LSHS_DEALLOC__GFX10CORE = 0x00000026, - SPI_PERF_HS_OFFCHIP_LDS_STALL__GFX10CORE = 0x00000027, - SPI_PERF_HS_EVENT_WAVE__GFX10CORE = 0x00000028, - SPI_PERF_HS_WAVE__GFX10CORE = 0x00000029, - SPI_PERF_HS_PERS_UPD_FULL0__GFX10CORE = 0x0000002a, - SPI_PERF_HS_PERS_UPD_FULL1__GFX10CORE = 0x0000002b, - SPI_PERF_CSGN_WINDOW_VALID__GFX10CORE = 0x0000002c, - SPI_PERF_CSGN_BUSY__GFX10CORE = 0x0000002d, - SPI_PERF_CSGN_NUM_THREADGROUPS__GFX10CORE = 0x0000002e, - SPI_PERF_CSGN_CRAWLER_STALL__GFX10CORE = 0x0000002f, - SPI_PERF_CSGN_EVENT_WAVE__GFX10CORE = 0x00000030, - SPI_PERF_CSGN_WAVE__GFX10CORE = 0x00000031, - SPI_PERF_CSN_WINDOW_VALID__GFX10CORE = 0x00000032, - SPI_PERF_CSN_BUSY__GFX10CORE = 0x00000033, - SPI_PERF_CSN_NUM_THREADGROUPS__GFX10CORE = 0x00000034, - SPI_PERF_CSN_CRAWLER_STALL__GFX10CORE = 0x00000035, - SPI_PERF_CSN_EVENT_WAVE__GFX10CORE = 0x00000036, - SPI_PERF_CSN_WAVE__GFX10CORE = 0x00000037, - SPI_PERF_PS0_WINDOW_VALID__GFX10CORE = 0x00000038, - SPI_PERF_PS1_WINDOW_VALID__GFX10CORE = 0x00000039, - SPI_PERF_PS2_WINDOW_VALID__GFX10CORE = 0x0000003a, - SPI_PERF_PS3_WINDOW_VALID__GFX10CORE = 0x0000003b, - SPI_PERF_PS0_BUSY__GFX10CORE = 0x0000003c, - SPI_PERF_PS1_BUSY__GFX10CORE = 0x0000003d, - SPI_PERF_PS2_BUSY__GFX10CORE = 0x0000003e, - SPI_PERF_PS3_BUSY__GFX10CORE = 0x0000003f, - SPI_PERF_PS0_ACTIVE__GFX10CORE = 0x00000040, - SPI_PERF_PS1_ACTIVE__GFX10CORE = 0x00000041, - SPI_PERF_PS2_ACTIVE__GFX10CORE = 0x00000042, - SPI_PERF_PS3_ACTIVE__GFX10CORE = 0x00000043, - SPI_PERF_PS0_DEALLOC__GFX10CORE = 0x00000044, - SPI_PERF_PS1_DEALLOC__GFX10CORE = 0x00000045, - SPI_PERF_PS2_DEALLOC__GFX10CORE = 0x00000046, - SPI_PERF_PS3_DEALLOC__GFX10CORE = 0x00000047, - SPI_PERF_PS0_FPOS_STALL__GFX10CORE = 0x00000048, - SPI_PERF_PS1_FPOS_STALL__GFX10CORE = 0x00000049, - SPI_PERF_PS2_FPOS_STALL__GFX10CORE = 0x0000004a, - SPI_PERF_PS3_FPOS_STALL__GFX10CORE = 0x0000004b, - SPI_PERF_PS0_EVENT_WAVE__GFX10CORE = 0x0000004c, - SPI_PERF_PS1_EVENT_WAVE__GFX10CORE = 0x0000004d, - SPI_PERF_PS2_EVENT_WAVE__GFX10CORE = 0x0000004e, - SPI_PERF_PS3_EVENT_WAVE__GFX10CORE = 0x0000004f, - SPI_PERF_PS0_WAVE__GFX10CORE = 0x00000050, - SPI_PERF_PS1_WAVE__GFX10CORE = 0x00000051, - SPI_PERF_PS2_WAVE__GFX10CORE = 0x00000052, - SPI_PERF_PS3_WAVE__GFX10CORE = 0x00000053, - SPI_PERF_PS0_OPT_WAVE__GFX10CORE = 0x00000054, - SPI_PERF_PS1_OPT_WAVE__GFX10CORE = 0x00000055, - SPI_PERF_PS2_OPT_WAVE__GFX10CORE = 0x00000056, - SPI_PERF_PS3_OPT_WAVE__GFX10CORE = 0x00000057, - SPI_PERF_PS0_PASS_BIN0__GFX10CORE = 0x00000058, - SPI_PERF_PS1_PASS_BIN0__GFX10CORE = 0x00000059, - SPI_PERF_PS2_PASS_BIN0__GFX10CORE = 0x0000005a, - SPI_PERF_PS3_PASS_BIN0__GFX10CORE = 0x0000005b, - SPI_PERF_PS0_PASS_BIN1__GFX10CORE = 0x0000005c, - SPI_PERF_PS1_PASS_BIN1__GFX10CORE = 0x0000005d, - SPI_PERF_PS2_PASS_BIN1__GFX10CORE = 0x0000005e, - SPI_PERF_PS3_PASS_BIN1__GFX10CORE = 0x0000005f, - SPI_PERF_PS0_FPOS__GFX10CORE = 0x00000060, - SPI_PERF_PS1_FPOS__GFX10CORE = 0x00000061, - SPI_PERF_PS2_FPOS__GFX10CORE = 0x00000062, - SPI_PERF_PS3_FPOS__GFX10CORE = 0x00000063, - SPI_PERF_PS0_PRIM_BIN0__GFX10CORE = 0x00000064, - SPI_PERF_PS1_PRIM_BIN0__GFX10CORE = 0x00000065, - SPI_PERF_PS2_PRIM_BIN0__GFX10CORE = 0x00000066, - SPI_PERF_PS3_PRIM_BIN0__GFX10CORE = 0x00000067, - SPI_PERF_PS0_PRIM_BIN1__GFX10CORE = 0x00000068, - SPI_PERF_PS1_PRIM_BIN1__GFX10CORE = 0x00000069, - SPI_PERF_PS2_PRIM_BIN1__GFX10CORE = 0x0000006a, - SPI_PERF_PS3_PRIM_BIN1__GFX10CORE = 0x0000006b, - SPI_PERF_PS0_CNF_BIN2__GFX10CORE = 0x0000006c, - SPI_PERF_PS1_CNF_BIN2__GFX10CORE = 0x0000006d, - SPI_PERF_PS2_CNF_BIN2__GFX10CORE = 0x0000006e, - SPI_PERF_PS3_CNF_BIN2__GFX10CORE = 0x0000006f, - SPI_PERF_PS0_CNF_BIN3__GFX10CORE = 0x00000070, - SPI_PERF_PS1_CNF_BIN3__GFX10CORE = 0x00000071, - SPI_PERF_PS2_CNF_BIN3__GFX10CORE = 0x00000072, - SPI_PERF_PS3_CNF_BIN3__GFX10CORE = 0x00000073, - SPI_PERF_PS0_CRAWLER_STALL__GFX10CORE = 0x00000074, - SPI_PERF_PS1_CRAWLER_STALL__GFX10CORE = 0x00000075, - SPI_PERF_PS2_CRAWLER_STALL__GFX10CORE = 0x00000076, - SPI_PERF_PS3_CRAWLER_STALL__GFX10CORE = 0x00000077, - SPI_PERF_PS0_LDS_RES_FULL__GFX10CORE = 0x00000078, - SPI_PERF_PS1_LDS_RES_FULL__GFX10CORE = 0x00000079, - SPI_PERF_PS2_LDS_RES_FULL__GFX10CORE = 0x0000007a, - SPI_PERF_PS3_LDS_RES_FULL__GFX10CORE = 0x0000007b, - SPI_PERF_PS0_POPS_WAVE_SENT__GFX10CORE = 0x0000007c, - SPI_PERF_PS1_POPS_WAVE_SENT__GFX10CORE = 0x0000007d, - SPI_PERF_PS2_POPS_WAVE_SENT__GFX10CORE = 0x0000007e, - SPI_PERF_PS3_POPS_WAVE_SENT__GFX10CORE = 0x0000007f, - SPI_PERF_PS0_POPS_WAVE_EXIT__GFX10CORE = 0x00000080, - SPI_PERF_PS1_POPS_WAVE_EXIT__GFX10CORE = 0x00000081, - SPI_PERF_PS2_POPS_WAVE_EXIT__GFX10CORE = 0x00000082, - SPI_PERF_PS3_POPS_WAVE_EXIT__GFX10CORE = 0x00000083, - SPI_PERF_PS_PERS_UPD_FULL0__GFX10CORE = 0x00000084, - SPI_PERF_PS_PERS_UPD_FULL1__GFX10CORE = 0x00000085, - SPI_PERF_PS0_INTRA_PRIM_CNFLCT__GFX10CORE = 0x00000086, - SPI_PERF_PS1_INTRA_PRIM_CNFLCT__GFX10CORE = 0x00000087, - SPI_PERF_PS2_INTRA_PRIM_CNFLCT__GFX10CORE = 0x00000088, - SPI_PERF_PS3_INTRA_PRIM_CNFLCT__GFX10CORE = 0x00000089, - SPI_PERF_LDS0_PC_VALID__GFX10CORE = 0x0000008a, - SPI_PERF_LDS1_PC_VALID__GFX10CORE = 0x0000008b, - SPI_PERF_RA_PIPE_REQ_BIN2__GFX10CORE = 0x0000008c, - SPI_PERF_RA_TASK_REQ_BIN3__GFX10CORE = 0x0000008d, - SPI_PERF_RA_WR_CTL_FULL__GFX10CORE = 0x0000008e, - SPI_PERF_RA_REQ_NO_ALLOC__GFX10CORE = 0x0000008f, - SPI_PERF_RA_REQ_NO_ALLOC_PS__GFX10CORE = 0x00000090, - SPI_PERF_RA_REQ_NO_ALLOC_VS__GFX10CORE = 0x00000091, - SPI_PERF_RA_REQ_NO_ALLOC_HS__GFX10CORE = 0x00000095, - SPI_PERF_RA_REQ_NO_ALLOC_CSG__GFX10CORE = 0x00000096, - SPI_PERF_RA_REQ_NO_ALLOC_CSN__GFX10CORE = 0x00000097, - SPI_PERF_RA_RES_STALL_PS__GFX10CORE = 0x00000098, - SPI_PERF_RA_RES_STALL_VS__GFX10CORE = 0x00000099, - SPI_PERF_RA_RES_STALL_GS__GFX10CORE = 0x0000009b, - SPI_PERF_RA_RES_STALL_HS__GFX10CORE = 0x0000009d, - SPI_PERF_RA_RES_STALL_CSG__GFX10CORE = 0x0000009e, - SPI_PERF_RA_RES_STALL_CSN__GFX10CORE = 0x0000009f, - SPI_PERF_RA_TMP_STALL_PS__GFX10CORE = 0x000000a0, - SPI_PERF_RA_TMP_STALL_VS__GFX10CORE = 0x000000a1, - SPI_PERF_RA_TMP_STALL_GS__GFX10CORE = 0x000000a3, - SPI_PERF_RA_TMP_STALL_HS__GFX10CORE = 0x000000a5, - SPI_PERF_RA_TMP_STALL_CSG__GFX10CORE = 0x000000a6, - SPI_PERF_RA_TMP_STALL_CSN__GFX10CORE = 0x000000a7, - SPI_PERF_RA_WAVE_SIMD_FULL_PS__GFX10CORE = 0x000000a8, - SPI_PERF_RA_WAVE_SIMD_FULL_VS__GFX10CORE = 0x000000a9, - SPI_PERF_RA_WAVE_SIMD_FULL_GS__GFX10CORE = 0x000000ab, - SPI_PERF_RA_WAVE_SIMD_FULL_HS__GFX10CORE = 0x000000ad, - SPI_PERF_RA_WAVE_SIMD_FULL_CSG__GFX10CORE = 0x000000ae, - SPI_PERF_RA_WAVE_SIMD_FULL_CSN__GFX10CORE = 0x000000af, - SPI_PERF_RA_VGPR_SIMD_FULL_PS__GFX10CORE = 0x000000b0, - SPI_PERF_RA_VGPR_SIMD_FULL_VS__GFX10CORE = 0x000000b1, - SPI_PERF_RA_VGPR_SIMD_FULL_GS__GFX10CORE = 0x000000b3, - SPI_PERF_RA_VGPR_SIMD_FULL_HS__GFX10CORE = 0x000000b4, - SPI_PERF_RA_VGPR_SIMD_FULL_CSG__GFX10CORE = 0x000000b6, - SPI_PERF_RA_VGPR_SIMD_FULL_CSN__GFX10CORE = 0x000000b7, - SPI_PERF_RA_LDS_CU_FULL_PS__GFX10CORE = 0x000000c0, - SPI_PERF_RA_LDS_CU_FULL_HS__GFX10CORE = 0x000000c2, - SPI_PERF_RA_LDS_CU_FULL_GS__GFX10CORE = 0x000000c4, - SPI_PERF_RA_LDS_CU_FULL_CSG__GFX10CORE = 0x000000c5, - SPI_PERF_RA_LDS_CU_FULL_CSN__GFX10CORE = 0x000000c6, - SPI_PERF_RA_BAR_CU_FULL_HS__GFX10CORE = 0x000000c7, - SPI_PERF_RA_BAR_CU_FULL_CSG__GFX10CORE = 0x000000c8, - SPI_PERF_RA_BAR_CU_FULL_CSN__GFX10CORE = 0x000000c9, - SPI_PERF_RA_BULKY_CU_FULL_CSG__GFX10CORE = 0x000000ca, - SPI_PERF_RA_BULKY_CU_FULL_CSN__GFX10CORE = 0x000000cb, - SPI_PERF_RA_TGLIM_CU_FULL_CSG__GFX10CORE = 0x000000cc, - SPI_PERF_RA_TGLIM_CU_FULL_CSN__GFX10CORE = 0x000000cd, - SPI_PERF_RA_WVLIM_STALL_PS__GFX10CORE = 0x000000ce, - SPI_PERF_RA_WVLIM_STALL_VS__GFX10CORE = 0x000000cf, - SPI_PERF_RA_WVLIM_STALL_GS__GFX10CORE = 0x000000d1, - SPI_PERF_RA_WVLIM_STALL_HS__GFX10CORE = 0x000000d3, - SPI_PERF_RA_WVLIM_STALL_CSG__GFX10CORE = 0x000000d4, - SPI_PERF_RA_WVLIM_STALL_CSN__GFX10CORE = 0x000000d5, - SPI_PERF_RA_VS_LOCK__GFX10CORE = 0x000000d6, - SPI_PERF_RA_PS_LOCK__GFX10CORE = 0x000000d7, - SPI_PERF_RA_GS_LOCK__GFX10CORE = 0x000000d9, - SPI_PERF_RA_HS_LOCK__GFX10CORE = 0x000000db, - SPI_PERF_RA_CSG_LOCK__GFX10CORE = 0x000000dc, - SPI_PERF_RA_CSN_LOCK__GFX10CORE = 0x000000dd, - SPI_PERF_RA_RSV_UPD__GFX10CORE = 0x000000de, - SPI_PERF_RA_PRE_ALLOC_STALL__GFX10CORE = 0x000000df, - SPI_PERF_RA_GFX_UNDER_TUNNEL__GFX10CORE = 0x000000e0, - SPI_PERF_RA_CSC_UNDER_TUNNEL__GFX10CORE = 0x000000e1, - SPI_PERF_RA_WVALLOC_STALL__GFX10CORE = 0x000000e2, - SPI_PERF_RA_ACCUM0_SIMD_FULL_PS__GFX10CORE = 0x000000e3, - SPI_PERF_RA_ACCUM1_SIMD_FULL_PS__GFX10CORE = 0x000000e4, - SPI_PERF_RA_ACCUM2_SIMD_FULL_PS__GFX10CORE = 0x000000e5, - SPI_PERF_RA_ACCUM3_SIMD_FULL_PS__GFX10CORE = 0x000000e6, - SPI_PERF_RA_ACCUM0_SIMD_FULL_VS__GFX10CORE = 0x000000e7, - SPI_PERF_RA_ACCUM1_SIMD_FULL_VS__GFX10CORE = 0x000000e8, - SPI_PERF_RA_ACCUM2_SIMD_FULL_VS__GFX10CORE = 0x000000e9, - SPI_PERF_RA_ACCUM3_SIMD_FULL_VS__GFX10CORE = 0x000000ea, - SPI_PERF_RA_ACCUM0_SIMD_FULL_GS__GFX10CORE = 0x000000eb, - SPI_PERF_RA_ACCUM1_SIMD_FULL_GS__GFX10CORE = 0x000000ec, - SPI_PERF_RA_ACCUM2_SIMD_FULL_GS__GFX10CORE = 0x000000ed, - SPI_PERF_RA_ACCUM3_SIMD_FULL_GS__GFX10CORE = 0x000000ee, - SPI_PERF_RA_ACCUM0_SIMD_FULL_HS__GFX10CORE = 0x000000ef, - SPI_PERF_RA_ACCUM1_SIMD_FULL_HS__GFX10CORE = 0x000000f0, - SPI_PERF_RA_ACCUM2_SIMD_FULL_HS__GFX10CORE = 0x000000f1, - SPI_PERF_RA_ACCUM3_SIMD_FULL_HS__GFX10CORE = 0x000000f2, - SPI_PERF_RA_ACCUM0_SIMD_FULL_CSG__GFX10CORE = 0x000000f3, - SPI_PERF_RA_ACCUM1_SIMD_FULL_CSG__GFX10CORE = 0x000000f4, - SPI_PERF_RA_ACCUM2_SIMD_FULL_CSG__GFX10CORE = 0x000000f5, - SPI_PERF_RA_ACCUM3_SIMD_FULL_CSG__GFX10CORE = 0x000000f6, - SPI_PERF_RA_ACCUM0_SIMD_FULL_CSN__GFX10CORE = 0x000000f7, - SPI_PERF_RA_ACCUM1_SIMD_FULL_CSN__GFX10CORE = 0x000000f8, - SPI_PERF_RA_ACCUM2_SIMD_FULL_CSN__GFX10CORE = 0x000000f9, - SPI_PERF_RA_ACCUM3_SIMD_FULL_CSN__GFX10CORE = 0x000000fa, - SPI_PERF_EXP_ARB_COL_CNT__GFX10CORE = 0x000000fb, - SPI_PERF_EXP_ARB_PAR_CNT__GFX10CORE = 0x000000fc, - SPI_PERF_EXP_ARB_POS_CNT__GFX10CORE = 0x000000fd, - SPI_PERF_EXP_ARB_GDS_CNT__GFX10CORE = 0x000000fe, - SPI_PERF_NUM_PS_COL_SA0SQ0_EXPORTS__GFX10CORE = 0x000000ff, - SPI_PERF_NUM_PS_COL_SA0SQ1_EXPORTS__GFX10CORE = 0x00000100, - SPI_PERF_NUM_PS_COL_SA1SQ0_EXPORTS__GFX10CORE = 0x00000101, - SPI_PERF_NUM_PS_COL_SA1SQ1_EXPORTS__GFX10CORE = 0x00000102, - SPI_PERF_NUM_VS_POS_SA0SQ0_EXPORTS__GFX10CORE = 0x00000103, - SPI_PERF_NUM_VS_POS_SA0SQ1_EXPORTS__GFX10CORE = 0x00000104, - SPI_PERF_NUM_VS_POS_SA1SQ0_EXPORTS__GFX10CORE = 0x00000105, - SPI_PERF_NUM_VS_POS_SA1SQ1_EXPORTS__GFX10CORE = 0x00000106, - SPI_PERF_NUM_VS_PARAM_SA0SQ0_EXPORTS__GFX10CORE = 0x00000107, - SPI_PERF_NUM_VS_PARAM_SA0SQ1_EXPORTS__GFX10CORE = 0x00000108, - SPI_PERF_NUM_VS_PARAM_SA1SQ0_EXPORTS__GFX10CORE = 0x00000109, - SPI_PERF_NUM_VS_PARAM_SA1SQ1_EXPORTS__GFX10CORE = 0x0000010a, - SPI_PERF_NUM_VS_GDS_SA0SQ0_EXPORTS__GFX10CORE = 0x0000010b, - SPI_PERF_NUM_VS_GDS_SA0SQ1_EXPORTS__GFX10CORE = 0x0000010c, - SPI_PERF_NUM_VS_GDS_SA1SQ0_EXPORTS__GFX10CORE = 0x0000010d, - SPI_PERF_NUM_VS_GDS_SA1SQ1_EXPORTS__GFX10CORE = 0x0000010e, - SPI_PERF_NUM_EXPGRANT_EXPORTS__GFX10CORE = 0x0000010f, - SPI_PERF_CLKGATE_BUSY_STALL__GFX10CORE = 0x00000110, - SPI_PERF_CLKGATE_ACTIVE_STALL__GFX10CORE = 0x00000111, - SPI_PERF_CLKGATE_ALL_CLOCKS_ON__GFX10CORE = 0x00000112, - SPI_PERF_CLKGATE_CGTT_DYN_ON__GFX10CORE = 0x00000113, - SPI_PERF_CLKGATE_CGTT_REG_ON__GFX10CORE = 0x00000114, - SPI_PERF_PIX_ALLOC_PEND_CNT__GFX10CORE = 0x00000115, - SPI_PERF_PIX_ALLOC_SCB0_STALL__GFX10CORE = 0x00000116, - SPI_PERF_PIX_ALLOC_SCB1_STALL__GFX10CORE = 0x00000117, - SPI_PERF_PIX_ALLOC_SCB2_STALL__GFX10CORE = 0x00000118, - SPI_PERF_PIX_ALLOC_SCB3_STALL__GFX10CORE = 0x00000119, - SPI_PERF_PIX_ALLOC_DB0_STALL__GFX10CORE = 0x0000011a, - SPI_PERF_PIX_ALLOC_DB1_STALL__GFX10CORE = 0x0000011b, - SPI_PERF_PIX_ALLOC_DB2_STALL__GFX10CORE = 0x0000011c, - SPI_PERF_PIX_ALLOC_DB3_STALL__GFX10CORE = 0x0000011d, - SPI_PERF_PIX_ALLOC_DB4_STALL__GFX10CORE = 0x0000011e, - SPI_PERF_PIX_ALLOC_DB5_STALL__GFX10CORE = 0x0000011f, - SPI_PERF_PIX_ALLOC_DB6_STALL__GFX10CORE = 0x00000120, - SPI_PERF_PIX_ALLOC_DB7_STALL__GFX10CORE = 0x00000121, - SPI_PERF_GS_NGG_SE_SEND_GS_ALLOC__GFX10CORE = 0x00000122, - SPI_PERF_SWC_GS_WR__GFX10CORE = 0x00000126, - SPI_PERF_SWC_HS_WR__GFX10CORE = 0x00000128, - SPI_PERF_SWC_CSGN_WR__GFX10CORE = 0x00000129, - SPI_PERF_SWC_CSN_WR__GFX10CORE = 0x0000012a, - SPI_PERF_VWC_PS_WR__GFX10CORE = 0x0000012b, - SPI_PERF_VWC_VS_WR__GFX10CORE = 0x0000012c, - SPI_PERF_VWC_GS_WR__GFX10CORE = 0x0000012e, - SPI_PERF_VWC_HS_WR__GFX10CORE = 0x00000130, - SPI_PERF_VWC_CSGN_WR__GFX10CORE = 0x00000131, - SPI_PERF_VWC_CSN_WR__GFX10CORE = 0x00000132, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SPI_PERF_GS_WINDOW_VALID__GFX11 = 0x00000001, - SPI_PERF_GS_BUSY__GFX11 = 0x00000002, - SPI_PERF_GS_CRAWLER_STALL__GFX11 = 0x00000003, - SPI_PERF_GS_EVENT_WAVE__GFX11 = 0x00000004, - SPI_PERF_GS_WAVE__GFX11 = 0x00000005, - SPI_PERF_GS_PERS_UPD_FULL0__GFX11 = 0x00000006, - SPI_PERF_GS_PERS_UPD_FULL1__GFX11 = 0x00000007, - SPI_PERF_GS_FIRST_SUBGRP__GFX11 = 0x00000008, - SPI_PERF_GS_HS_DEALLOC__GFX11 = 0x00000009, - SPI_PERF_GS_NGG_SE_LATE_ALLOC_LIMIT__GFX11 = 0x0000000a, - SPI_PERF_GS_POS0_STALL__GFX11 = 0x0000000b, - SPI_PERF_GS_POS1_STALL__GFX11 = 0x0000000c, - SPI_PERF_GS_INDX0_STALL__GFX11 = 0x0000000d, - SPI_PERF_GS_INDX1_STALL__GFX11 = 0x0000000e, - SPI_PERF_GS_PWS_STALL__GFX11 = 0x0000000f, - SPI_PERF_GS_GRP_LIFETIME__GFX11 = 0x00000010, - SPI_PERF_GS_WAVE_IN_FLIGHT__GFX11 = 0x00000011, - SPI_PERF_HS_WINDOW_VALID__GFX11 = 0x00000015, - SPI_PERF_HS_BUSY__GFX11 = 0x00000016, - SPI_PERF_HS_CRAWLER_STALL__GFX11 = 0x00000017, - SPI_PERF_HS_FIRST_WAVE__GFX11 = 0x00000018, - SPI_PERF_HS_OFFCHIP_LDS_STALL__GFX11 = 0x00000019, - SPI_PERF_HS_EVENT_WAVE__GFX11 = 0x0000001a, - SPI_PERF_HS_WAVE__GFX11 = 0x0000001b, - SPI_PERF_HS_PERS_UPD_FULL0__GFX11 = 0x0000001c, - SPI_PERF_HS_PERS_UPD_FULL1__GFX11 = 0x0000001d, - SPI_PERF_HS_PWS_STALL__GFX11 = 0x0000001e, - SPI_PERF_HS_WAVE_IN_FLIGHT__GFX11 = 0x0000001f, - SPI_PERF_CSGN_WINDOW_VALID__GFX11 = 0x00000025, - SPI_PERF_CSGN_BUSY__GFX11 = 0x00000026, - SPI_PERF_CSGN_NUM_THREADGROUPS__GFX11 = 0x00000027, - SPI_PERF_CSGN_CRAWLER_STALL__GFX11 = 0x00000028, - SPI_PERF_CSGN_EVENT_WAVE__GFX11 = 0x00000029, - SPI_PERF_CSGN_WAVE__GFX11 = 0x0000002a, - SPI_PERF_CSGN_PWS_STALL__GFX11 = 0x0000002b, - SPI_PERF_CSGN_WAVE_IN_FLIGHT__GFX11 = 0x0000002c, - SPI_PERF_CSN_WINDOW_VALID__GFX11 = 0x0000002d, - SPI_PERF_CSN_BUSY__GFX11 = 0x0000002e, - SPI_PERF_CSN_NUM_THREADGROUPS__GFX11 = 0x0000002f, - SPI_PERF_CSN_CRAWLER_STALL__GFX11 = 0x00000030, - SPI_PERF_CSN_EVENT_WAVE__GFX11 = 0x00000031, - SPI_PERF_CSN_WAVE__GFX11 = 0x00000032, - SPI_PERF_CSN_WAVE_IN_FLIGHT__GFX11 = 0x00000033, - SPI_PERF_PS0_EVENT_WAVE__GFX11 = 0x00000045, - SPI_PERF_PS1_EVENT_WAVE__GFX11 = 0x00000046, - SPI_PERF_PS2_EVENT_WAVE__GFX11 = 0x00000047, - SPI_PERF_PS3_EVENT_WAVE__GFX11 = 0x00000048, - SPI_PERF_PS0_WAVE__GFX11 = 0x00000049, - SPI_PERF_PS1_WAVE__GFX11 = 0x0000004a, - SPI_PERF_PS2_WAVE__GFX11 = 0x0000004b, - SPI_PERF_PS3_WAVE__GFX11 = 0x0000004c, - SPI_PERF_PS0_OPT_WAVE__GFX11 = 0x0000004d, - SPI_PERF_PS1_OPT_WAVE__GFX11 = 0x0000004e, - SPI_PERF_PS2_OPT_WAVE__GFX11 = 0x0000004f, - SPI_PERF_PS3_OPT_WAVE__GFX11 = 0x00000050, - SPI_PERF_PS0_PRIM_BIN0__GFX11 = 0x00000051, - SPI_PERF_PS1_PRIM_BIN0__GFX11 = 0x00000052, - SPI_PERF_PS2_PRIM_BIN0__GFX11 = 0x00000053, - SPI_PERF_PS3_PRIM_BIN0__GFX11 = 0x00000054, - SPI_PERF_PS0_PRIM_BIN1__GFX11 = 0x00000055, - SPI_PERF_PS1_PRIM_BIN1__GFX11 = 0x00000056, - SPI_PERF_PS2_PRIM_BIN1__GFX11 = 0x00000057, - SPI_PERF_PS3_PRIM_BIN1__GFX11 = 0x00000058, - SPI_PERF_PS0_CRAWLER_STALL__GFX11 = 0x00000059, - SPI_PERF_PS1_CRAWLER_STALL__GFX11 = 0x0000005a, - SPI_PERF_PS2_CRAWLER_STALL__GFX11 = 0x0000005b, - SPI_PERF_PS3_CRAWLER_STALL__GFX11 = 0x0000005c, - SPI_PERF_PS_PERS_UPD_FULL0__GFX11 = 0x0000005d, - SPI_PERF_PS_PERS_UPD_FULL1__GFX11 = 0x0000005e, - SPI_PERF_PS0_2_WAVE_GROUPS__GFX11 = 0x0000005f, - SPI_PERF_PS1_2_WAVE_GROUPS__GFX11 = 0x00000060, - SPI_PERF_PS2_2_WAVE_GROUPS__GFX11 = 0x00000061, - SPI_PERF_PS3_2_WAVE_GROUPS__GFX11 = 0x00000062, - SPI_PERF_PS0_WAVE_GROUP_CLOCK_DELAY__GFX11 = 0x00000063, - SPI_PERF_PS1_WAVE_GROUP_CLOCK_DELAY__GFX11 = 0x00000064, - SPI_PERF_PS2_WAVE_GROUP_CLOCK_DELAY__GFX11 = 0x00000065, - SPI_PERF_PS3_WAVE_GROUP_CLOCK_DELAY__GFX11 = 0x00000066, - SPI_PERF_PS0_WAVE_GROUP_TIMEOUTS__GFX11 = 0x00000067, - SPI_PERF_PS1_WAVE_GROUP_TIMEOUTS__GFX11 = 0x00000068, - SPI_PERF_PS2_WAVE_GROUP_TIMEOUTS__GFX11 = 0x00000069, - SPI_PERF_PS3_WAVE_GROUP_TIMEOUTS__GFX11 = 0x0000006a, - SPI_PERF_PS_PWS_STALL__GFX11 = 0x0000006b, - SPI_PERF_PS0_LDS_DONE_FULL__GFX11 = 0x0000006c, - SPI_PERF_PS1_LDS_DONE_FULL__GFX11 = 0x0000006d, - SPI_PERF_PS2_LDS_DONE_FULL__GFX11 = 0x0000006e, - SPI_PERF_PS3_LDS_DONE_FULL__GFX11 = 0x0000006f, - SPI_PERF_PS0_DEALLOC_FULL__GFX11 = 0x00000070, - SPI_PERF_PS1_DEALLOC_FULL__GFX11 = 0x00000071, - SPI_PERF_PS2_DEALLOC_FULL__GFX11 = 0x00000072, - SPI_PERF_PS3_DEALLOC_FULL__GFX11 = 0x00000073, - SPI_PERF_PS0_WAVE_IN_FLIGHT__GFX11 = 0x00000074, - SPI_PERF_PS1_WAVE_IN_FLIGHT__GFX11 = 0x00000075, - SPI_PERF_PS2_WAVE_IN_FLIGHT__GFX11 = 0x00000076, - SPI_PERF_PS3_WAVE_IN_FLIGHT__GFX11 = 0x00000077, - SPI_PERF_RA_GS_LDS_OCCUPANCY__GFX11 = 0x00000085, - SPI_PERF_RA_GS_VGPR_OCCUPANCY__GFX11 = 0x00000086, - SPI_PERF_RA_PS_LDS_OCCUPANCY__GFX11 = 0x00000087, - SPI_PERF_RA_PS_VGPR_OCCUPANCY__GFX11 = 0x00000088, - SPI_PERF_RA_SPI_THROTTLE__GFX11 = 0x00000089, - SPI_PERF_RA_PH_THROTTLE__GFX11 = 0x0000008a, - SPI_PERF_RA_PC_PROBE_STALL_PS__GFX11 = 0x0000008b, - SPI_PERF_RA_PC_PSWAVE_STALL_PS__GFX11 = 0x0000008c, - SPI_PERF_RA_REQ_NO_ALLOC_GS__GFX11 = 0x00000092, - SPI_PERF_RA_REQ_NO_ALLOC_HS__GFX11 = 0x00000093, - SPI_PERF_RA_REQ_NO_ALLOC_CSG__GFX11 = 0x00000094, - SPI_PERF_RA_REQ_NO_ALLOC_CSN__GFX11 = 0x00000095, - SPI_PERF_RA_RES_STALL_PS__GFX11 = 0x00000096, - SPI_PERF_RA_RES_STALL_GS__GFX11 = 0x00000097, - SPI_PERF_RA_RES_STALL_HS__GFX11 = 0x00000098, - SPI_PERF_RA_RES_STALL_CSG__GFX11 = 0x00000099, - SPI_PERF_RA_RES_STALL_CSN__GFX11 = 0x0000009a, - SPI_PERF_RA_TMP_STALL_PS__GFX11 = 0x0000009b, - SPI_PERF_RA_TMP_STALL_GS__GFX11 = 0x0000009c, - SPI_PERF_RA_TMP_STALL_HS__GFX11 = 0x0000009d, - SPI_PERF_RA_TMP_STALL_CSG__GFX11 = 0x0000009e, - SPI_PERF_RA_TMP_STALL_CSN__GFX11 = 0x0000009f, - SPI_PERF_RA_WAVE_SIMD_FULL_PS__GFX11 = 0x000000a0, - SPI_PERF_RA_WAVE_SIMD_FULL_GS__GFX11 = 0x000000a1, - SPI_PERF_RA_WAVE_SIMD_FULL_HS__GFX11 = 0x000000a2, - SPI_PERF_RA_WAVE_SIMD_FULL_CSG__GFX11 = 0x000000a3, - SPI_PERF_RA_WAVE_SIMD_FULL_CSN__GFX11 = 0x000000a4, - SPI_PERF_RA_VGPR_SIMD_FULL_PS__GFX11 = 0x000000a5, - SPI_PERF_RA_VGPR_SIMD_FULL_GS__GFX11 = 0x000000a6, - SPI_PERF_RA_VGPR_SIMD_FULL_HS__GFX11 = 0x000000a7, - SPI_PERF_RA_VGPR_SIMD_FULL_CSG__GFX11 = 0x000000a8, - SPI_PERF_RA_VGPR_SIMD_FULL_CSN__GFX11 = 0x000000a9, - SPI_PERF_RA_LDS_CU_FULL_PS__GFX11 = 0x000000aa, - SPI_PERF_RA_LDS_CU_FULL_HS__GFX11 = 0x000000ab, - SPI_PERF_RA_LDS_CU_FULL_GS__GFX11 = 0x000000ac, - SPI_PERF_RA_LDS_CU_FULL_CSG__GFX11 = 0x000000ad, - SPI_PERF_RA_LDS_CU_FULL_CSN__GFX11 = 0x000000ae, - SPI_PERF_RA_BAR_CU_FULL_PS__GFX11 = 0x000000af, - SPI_PERF_RA_BAR_CU_FULL_GS__GFX11 = 0x000000b0, - SPI_PERF_RA_BAR_CU_FULL_HS__GFX11 = 0x000000b1, - SPI_PERF_RA_BAR_CU_FULL_CSG__GFX11 = 0x000000b2, - SPI_PERF_RA_BAR_CU_FULL_CSN__GFX11 = 0x000000b3, - SPI_PERF_RA_BULKY_CU_FULL_CSG__GFX11 = 0x000000b4, - SPI_PERF_RA_BULKY_CU_FULL_CSN__GFX11 = 0x000000b5, - SPI_PERF_RA_TGLIM_CU_FULL_CSG__GFX11 = 0x000000b6, - SPI_PERF_RA_TGLIM_CU_FULL_CSN__GFX11 = 0x000000b7, - SPI_PERF_RA_WVLIM_STALL_PS__GFX11 = 0x000000b8, - SPI_PERF_RA_WVLIM_STALL_GS__GFX11 = 0x000000b9, - SPI_PERF_RA_WVLIM_STALL_HS__GFX11 = 0x000000ba, - SPI_PERF_RA_WVLIM_STALL_CSG__GFX11 = 0x000000bb, - SPI_PERF_RA_WVLIM_STALL_CSN__GFX11 = 0x000000bc, - SPI_PERF_RA_GS_LOCK__GFX11 = 0x000000bd, - SPI_PERF_RA_HS_LOCK__GFX11 = 0x000000be, - SPI_PERF_RA_CSG_LOCK__GFX11 = 0x000000bf, - SPI_PERF_RA_CSN_LOCK__GFX11 = 0x000000c0, - SPI_PERF_RA_RSV_UPD__GFX11 = 0x000000c1, - SPI_PERF_RA_PRE_ALLOC_STALL__GFX11 = 0x000000c2, - SPI_PERF_RA_GFX_UNDER_TUNNEL__GFX11 = 0x000000c3, - SPI_PERF_RA_CSC_UNDER_TUNNEL__GFX11 = 0x000000c4, - SPI_PERF_RA_WVALLOC_STALL__GFX11 = 0x000000c5, - SPI_PERF_RA_ACCUM0_SIMD_FULL_PS__GFX11 = 0x000000c6, - SPI_PERF_RA_ACCUM1_SIMD_FULL_PS__GFX11 = 0x000000c7, - SPI_PERF_RA_ACCUM2_SIMD_FULL_PS__GFX11 = 0x000000c8, - SPI_PERF_RA_ACCUM3_SIMD_FULL_PS__GFX11 = 0x000000c9, - SPI_PERF_RA_ACCUM0_SIMD_FULL_GS__GFX11 = 0x000000ca, - SPI_PERF_RA_ACCUM1_SIMD_FULL_GS__GFX11 = 0x000000cb, - SPI_PERF_RA_ACCUM2_SIMD_FULL_GS__GFX11 = 0x000000cc, - SPI_PERF_RA_ACCUM3_SIMD_FULL_GS__GFX11 = 0x000000cd, - SPI_PERF_RA_ACCUM0_SIMD_FULL_HS__GFX11 = 0x000000ce, - SPI_PERF_RA_ACCUM1_SIMD_FULL_HS__GFX11 = 0x000000cf, - SPI_PERF_RA_ACCUM2_SIMD_FULL_HS__GFX11 = 0x000000d0, - SPI_PERF_RA_ACCUM3_SIMD_FULL_HS__GFX11 = 0x000000d1, - SPI_PERF_RA_ACCUM0_SIMD_FULL_CSG__GFX11 = 0x000000d2, - SPI_PERF_RA_ACCUM1_SIMD_FULL_CSG__GFX11 = 0x000000d3, - SPI_PERF_RA_ACCUM2_SIMD_FULL_CSG__GFX11 = 0x000000d4, - SPI_PERF_RA_ACCUM3_SIMD_FULL_CSG__GFX11 = 0x000000d5, - SPI_PERF_RA_ACCUM0_SIMD_FULL_CSN__GFX11 = 0x000000d6, - SPI_PERF_RA_ACCUM1_SIMD_FULL_CSN__GFX11 = 0x000000d7, - SPI_PERF_RA_ACCUM2_SIMD_FULL_CSN__GFX11 = 0x000000d8, - SPI_PERF_RA_ACCUM3_SIMD_FULL_CSN__GFX11 = 0x000000d9, - SPI_PERF_EXP_ARB_COL_CNT__GFX11 = 0x000000da, - SPI_PERF_EXP_ARB_POS_CNT__GFX11 = 0x000000db, - SPI_PERF_EXP_ARB_GDS_CNT__GFX11 = 0x000000dc, - SPI_PERF_EXP_ARB_IDX_CNT__GFX11 = 0x000000dd, - SPI_PERF_EXP_WITH_CONFLICT__GFX11 = 0x000000de, - SPI_PERF_EXP_WITH_CONFLICT_CLEAR__GFX11 = 0x000000df, - SPI_PERF_GS_EXP_DONE__GFX11 = 0x000000e0, - SPI_PERF_PS_EXP_DONE__GFX11 = 0x000000e1, - SPI_PERF_PS_EXP_ARB_CONFLICT__GFX11 = 0x000000e2, - SPI_PERF_GS_SCBD_IDX_CLEANUP__GFX11 = 0x000000e3, - SPI_PERF_GS_SCBD_POS_CLEANUP__GFX11 = 0x000000e4, - SPI_PERF_PS_EXP_ALLOC__GFX11 = 0x000000e5, - SPI_PERF_PS0_WAVEID_STARVED__GFX11 = 0x000000e6, - SPI_PERF_PS1_WAVEID_STARVED__GFX11 = 0x000000e7, - SPI_PERF_PS2_WAVEID_STARVED__GFX11 = 0x000000e8, - SPI_PERF_PS3_WAVEID_STARVED__GFX11 = 0x000000e9, - SPI_PERF_PS0_EXP_ALLOC_WITH_CONFLICT__GFX11 = 0x000000ea, - SPI_PERF_PS1_EXP_ALLOC_WITH_CONFLICT__GFX11 = 0x000000eb, - SPI_PERF_PS2_EXP_ALLOC_WITH_CONFLICT__GFX11 = 0x000000ec, - SPI_PERF_PS3_EXP_ALLOC_WITH_CONFLICT__GFX11 = 0x000000ed, - SPI_PERF_NUM_PS_COL_SA0SQ0_EXPORTS__GFX11 = 0x000000ee, - SPI_PERF_NUM_PS_COL_SA0SQ1_EXPORTS__GFX11 = 0x000000ef, - SPI_PERF_NUM_PS_COL_SA1SQ0_EXPORTS__GFX11 = 0x000000f0, - SPI_PERF_NUM_PS_COL_SA1SQ1_EXPORTS__GFX11 = 0x000000f1, - SPI_PERF_NUM_POS_SA0SQ0_EXPORTS__GFX11 = 0x000000f2, - SPI_PERF_NUM_POS_SA0SQ1_EXPORTS__GFX11 = 0x000000f3, - SPI_PERF_NUM_POS_SA1SQ0_EXPORTS__GFX11 = 0x000000f4, - SPI_PERF_NUM_POS_SA1SQ1_EXPORTS__GFX11 = 0x000000f5, - SPI_PERF_NUM_GDS_SA0SQ0_EXPORTS__GFX11 = 0x000000f6, - SPI_PERF_NUM_GDS_SA0SQ1_EXPORTS__GFX11 = 0x000000f7, - SPI_PERF_NUM_GDS_SA1SQ0_EXPORTS__GFX11 = 0x000000f8, - SPI_PERF_NUM_GDS_SA1SQ1_EXPORTS__GFX11 = 0x000000f9, - SPI_PERF_NUM_EXPGRANT_EXPORTS__GFX11 = 0x000000fa, - SPI_PERF_GS_ALLOC_IDX__GFX11 = 0x000000fb, - SPI_PERF_GS_ALLOC_POS__GFX11 = 0x000000fc, - SPI_PERF_PIX_ALLOC_PEND_CNT__GFX11 = 0x000000fd, - SPI_PERF_EXPORT_SCB0_STALL__GFX11 = 0x000000fe, - SPI_PERF_EXPORT_SCB1_STALL__GFX11 = 0x000000ff, - SPI_PERF_EXPORT_SCB2_STALL__GFX11 = 0x00000100, - SPI_PERF_EXPORT_SCB3_STALL__GFX11 = 0x00000101, - SPI_PERF_EXPORT_DB0_STALL__GFX11 = 0x00000102, - SPI_PERF_EXPORT_DB1_STALL__GFX11 = 0x00000103, - SPI_PERF_EXPORT_DB2_STALL__GFX11 = 0x00000104, - SPI_PERF_EXPORT_DB3_STALL__GFX11 = 0x00000105, - SPI_PERF_EXPORT_DB4_STALL__GFX11 = 0x00000106, - SPI_PERF_EXPORT_DB5_STALL__GFX11 = 0x00000107, - SPI_PERF_EXPORT_DB6_STALL__GFX11 = 0x00000108, - SPI_PERF_EXPORT_DB7_STALL__GFX11 = 0x00000109, - SPI_PERF_GS_NGG_SE_SEND_GS_ALLOC__GFX11 = 0x0000010a, - SPI_PERF_GS_NGG_STALL_MSG_VAL__GFX11 = 0x0000010b, - SPI_PERF_SWC_PS_WR__GFX11 = 0x0000010c, - SPI_PERF_SWC_GS_WR__GFX11 = 0x0000010d, - SPI_PERF_SWC_HS_WR__GFX11 = 0x0000010e, - SPI_PERF_SWC_CSGN_WR__GFX11 = 0x0000010f, - SPI_PERF_SWC_CSN_WR__GFX11 = 0x00000110, - SPI_PERF_VWC_PS_WR__GFX11 = 0x00000111, - SPI_PERF_VWC_ES_WR__GFX11 = 0x00000112, - SPI_PERF_VWC_GS_WR__GFX11 = 0x00000113, - SPI_PERF_VWC_LS_WR__GFX11 = 0x00000114, - SPI_PERF_VWC_HS_WR__GFX11 = 0x00000115, - SPI_PERF_VWC_CSGN_WR__GFX11 = 0x00000116, - SPI_PERF_VWC_CSN_WR__GFX11 = 0x00000117, - SPI_PERF_EXP_THROT_UPSTEP__GFX11 = 0x00000118, - SPI_PERF_EXP_THROT_DOWNSTEP__GFX11 = 0x00000119, - SPI_PERF_EXP_THROT_CAUSALITY_DETECTED__GFX11 = 0x0000011a, - SPI_PERF_BUSY__GFX11 = 0x0000011b, -#endif - SPI_PERF_VS_WINDOW_VALID__HASHWVS = 0x00000000, - SPI_PERF_VS_BUSY__HASHWVS = 0x00000001, - SPI_PERF_VS_FIRST_WAVE__HASHWVS = 0x00000002, - SPI_PERF_VS_LAST_WAVE__HASHWVS = 0x00000003, - SPI_PERF_VS_LSHS_DEALLOC__HASHWVS = 0x00000004, -#if CHIP_HDR_NAVI21 - SPI_PERF_RA_REQ_NO_ALLOC_ES__NV21 = 0x00000092, - SPI_PERF_RA_REQ_NO_ALLOC_LS__NV21 = 0x00000094, - SPI_PERF_RA_RES_STALL_ES__NV21 = 0x0000009a, - SPI_PERF_RA_RES_STALL_LS__NV21 = 0x0000009c, - SPI_PERF_RA_TMP_STALL_ES__NV21 = 0x000000a2, - SPI_PERF_RA_TMP_STALL_LS__NV21 = 0x000000a4, - SPI_PERF_RA_WAVE_SIMD_FULL_ES__NV21 = 0x000000aa, - SPI_PERF_RA_WAVE_SIMD_FULL_LS__NV21 = 0x000000ac, - SPI_PERF_RA_VGPR_SIMD_FULL_ES__NV21 = 0x000000b2, - SPI_PERF_RA_VGPR_SIMD_FULL_LS__NV21 = 0x000000b5, - SPI_PERF_RA_SGPR_SIMD_FULL_PS__NV21 = 0x000000b8, - SPI_PERF_RA_SGPR_SIMD_FULL_VS__NV21 = 0x000000b9, - SPI_PERF_RA_SGPR_SIMD_FULL_ES__NV21 = 0x000000ba, - SPI_PERF_RA_SGPR_SIMD_FULL_GS__NV21 = 0x000000bb, - SPI_PERF_RA_SGPR_SIMD_FULL_LS__NV21 = 0x000000bc, - SPI_PERF_RA_SGPR_SIMD_FULL_HS__NV21 = 0x000000bd, - SPI_PERF_RA_SGPR_SIMD_FULL_CSG__NV21 = 0x000000be, - SPI_PERF_RA_SGPR_SIMD_FULL_CSN__NV21 = 0x000000bf, - SPI_PERF_RA_LDS_CU_FULL_LS__NV21 = 0x000000c1, - SPI_PERF_RA_LDS_CU_FULL_ES__NV21 = 0x000000c3, - SPI_PERF_RA_WVLIM_STALL_ES__NV21 = 0x000000d0, - SPI_PERF_RA_WVLIM_STALL_LS__NV21 = 0x000000d2, - SPI_PERF_RA_ES_LOCK__NV21 = 0x000000d8, - SPI_PERF_RA_LS_LOCK__NV21 = 0x000000da, - SPI_PERF_SWC_PS_WR__NV21 = 0x00000123, - SPI_PERF_SWC_VS_WR__NV21 = 0x00000124, - SPI_PERF_SWC_ES_WR__NV21 = 0x00000125, - SPI_PERF_SWC_LS_WR__NV21 = 0x00000127, - SPI_PERF_VWC_ES_WR__NV21 = 0x0000012d, - SPI_PERF_VWC_LS_WR__NV21 = 0x0000012f, - SPI_PERF_ES_WINDOW_VALID__NV21 = 0x00000133, - SPI_PERF_ES_BUSY__NV21 = 0x00000134, - SPI_PERF_ES_CRAWLER_STALL__NV21 = 0x00000135, - SPI_PERF_ES_FIRST_WAVE__NV21 = 0x00000136, - SPI_PERF_ES_LAST_WAVE__NV21 = 0x00000137, - SPI_PERF_ES_LSHS_DEALLOC__NV21 = 0x00000138, - SPI_PERF_ES_EVENT_WAVE__NV21 = 0x00000139, - SPI_PERF_ES_WAVE__NV21 = 0x0000013a, - SPI_PERF_ES_PERS_UPD_FULL0__NV21 = 0x0000013b, - SPI_PERF_ES_PERS_UPD_FULL1__NV21 = 0x0000013c, - SPI_PERF_ES_FIRST_SUBGRP__NV21 = 0x0000013d, - SPI_PERF_ES_LAST_SUBGRP__NV21 = 0x0000013e, - SPI_PERF_LS_WINDOW_VALID__NV21 = 0x0000013f, - SPI_PERF_LS_BUSY__NV21 = 0x00000140, - SPI_PERF_LS_CRAWLER_STALL__NV21 = 0x00000141, - SPI_PERF_LS_FIRST_WAVE__NV21 = 0x00000142, - SPI_PERF_LS_LAST_WAVE__NV21 = 0x00000143, - SPI_PERF_LS_OFFCHIP_LDS_STALL__NV21 = 0x00000144, - SPI_PERF_LS_EVENT_WAVE__NV21 = 0x00000145, - SPI_PERF_LS_WAVE__NV21 = 0x00000146, - SPI_PERF_LS_PERS_UPD_FULL0__NV21 = 0x00000147, - SPI_PERF_LS_PERS_UPD_FULL1__NV21 = 0x00000148, -#endif -#if CHIP_HDR_NAVI22 - SPI_PERF_RA_REQ_NO_ALLOC_ES__NV22 = 0x00000092, - SPI_PERF_RA_REQ_NO_ALLOC_LS__NV22 = 0x00000094, - SPI_PERF_RA_RES_STALL_ES__NV22 = 0x0000009a, - SPI_PERF_RA_RES_STALL_LS__NV22 = 0x0000009c, - SPI_PERF_RA_TMP_STALL_ES__NV22 = 0x000000a2, - SPI_PERF_RA_TMP_STALL_LS__NV22 = 0x000000a4, - SPI_PERF_RA_WAVE_SIMD_FULL_ES__NV22 = 0x000000aa, - SPI_PERF_RA_WAVE_SIMD_FULL_LS__NV22 = 0x000000ac, - SPI_PERF_RA_VGPR_SIMD_FULL_ES__NV22 = 0x000000b2, - SPI_PERF_RA_VGPR_SIMD_FULL_LS__NV22 = 0x000000b5, - SPI_PERF_RA_SGPR_SIMD_FULL_PS__NV22 = 0x000000b8, - SPI_PERF_RA_SGPR_SIMD_FULL_VS__NV22 = 0x000000b9, - SPI_PERF_RA_SGPR_SIMD_FULL_ES__NV22 = 0x000000ba, - SPI_PERF_RA_SGPR_SIMD_FULL_GS__NV22 = 0x000000bb, - SPI_PERF_RA_SGPR_SIMD_FULL_LS__NV22 = 0x000000bc, - SPI_PERF_RA_SGPR_SIMD_FULL_HS__NV22 = 0x000000bd, - SPI_PERF_RA_SGPR_SIMD_FULL_CSG__NV22 = 0x000000be, - SPI_PERF_RA_SGPR_SIMD_FULL_CSN__NV22 = 0x000000bf, - SPI_PERF_RA_LDS_CU_FULL_LS__NV22 = 0x000000c1, - SPI_PERF_RA_LDS_CU_FULL_ES__NV22 = 0x000000c3, - SPI_PERF_RA_WVLIM_STALL_ES__NV22 = 0x000000d0, - SPI_PERF_RA_WVLIM_STALL_LS__NV22 = 0x000000d2, - SPI_PERF_RA_ES_LOCK__NV22 = 0x000000d8, - SPI_PERF_RA_LS_LOCK__NV22 = 0x000000da, - SPI_PERF_SWC_PS_WR__NV22 = 0x00000123, - SPI_PERF_SWC_VS_WR__NV22 = 0x00000124, - SPI_PERF_SWC_ES_WR__NV22 = 0x00000125, - SPI_PERF_SWC_LS_WR__NV22 = 0x00000127, - SPI_PERF_VWC_ES_WR__NV22 = 0x0000012d, - SPI_PERF_VWC_LS_WR__NV22 = 0x0000012f, - SPI_PERF_ES_WINDOW_VALID__NV22 = 0x00000133, - SPI_PERF_ES_BUSY__NV22 = 0x00000134, - SPI_PERF_ES_CRAWLER_STALL__NV22 = 0x00000135, - SPI_PERF_ES_FIRST_WAVE__NV22 = 0x00000136, - SPI_PERF_ES_LAST_WAVE__NV22 = 0x00000137, - SPI_PERF_ES_LSHS_DEALLOC__NV22 = 0x00000138, - SPI_PERF_ES_EVENT_WAVE__NV22 = 0x00000139, - SPI_PERF_ES_WAVE__NV22 = 0x0000013a, - SPI_PERF_ES_PERS_UPD_FULL0__NV22 = 0x0000013b, - SPI_PERF_ES_PERS_UPD_FULL1__NV22 = 0x0000013c, - SPI_PERF_ES_FIRST_SUBGRP__NV22 = 0x0000013d, - SPI_PERF_ES_LAST_SUBGRP__NV22 = 0x0000013e, - SPI_PERF_LS_WINDOW_VALID__NV22 = 0x0000013f, - SPI_PERF_LS_BUSY__NV22 = 0x00000140, - SPI_PERF_LS_CRAWLER_STALL__NV22 = 0x00000141, - SPI_PERF_LS_FIRST_WAVE__NV22 = 0x00000142, - SPI_PERF_LS_LAST_WAVE__NV22 = 0x00000143, - SPI_PERF_LS_OFFCHIP_LDS_STALL__NV22 = 0x00000144, - SPI_PERF_LS_EVENT_WAVE__NV22 = 0x00000145, - SPI_PERF_LS_WAVE__NV22 = 0x00000146, - SPI_PERF_LS_PERS_UPD_FULL0__NV22 = 0x00000147, - SPI_PERF_LS_PERS_UPD_FULL1__NV22 = 0x00000148, -#endif -#if CHIP_HDR_NAVI23 - SPI_PERF_GS_NGG_STALL_MSG_VAL__NV23 = 0x00000123, - SPI_PERF_SWC_PS_WR__NV23 = 0x00000124, - SPI_PERF_SWC_VS_WR__NV23 = 0x00000125, -#endif -#if CHIP_HDR_NAVI24 - SPI_PERF_GS_NGG_STALL_MSG_VAL__NV24 = 0x00000123, - SPI_PERF_SWC_PS_WR__NV24 = 0x00000124, - SPI_PERF_SWC_VS_WR__NV24 = 0x00000125, -#endif -#if CHIP_HDR_NAVI32 - SPI_PERF_GS_GRP_LIFETIME_SAMPLE__NV32 = 0x00000012, -#endif -#if CHIP_HDR_NAVI33 - SPI_PERF_GS_GRP_LIFETIME_SAMPLE__NV33 = 0x00000012, -#endif - SPI_PERF_GS_NGG_STALL_MSG_VAL__RAPHAEL = 0x00000123, - SPI_PERF_SWC_PS_WR__RAPHAEL = 0x00000124, - SPI_PERF_SWC_VS_WR__RAPHAEL = 0x00000125, - SPI_PERF_GS_NGG_STALL_MSG_VAL__REMBRANDT = 0x00000123, - SPI_PERF_SWC_PS_WR__REMBRANDT = 0x00000124, - SPI_PERF_SWC_VS_WR__REMBRANDT = 0x00000125, -} SPI_PERFCNT_SEL; - -constexpr unsigned int MaxSpiPerfcntSelGfx09 = SPI_PERF_VWC_CSC_WR__GFX09; -constexpr unsigned int MaxSpiPerfcntSelGfx101 = SPI_PERF_LS_PERS_UPD_FULL1__GFX101; -constexpr unsigned int MaxSpiPerfcntSelGfx103 = SPI_PERF_EXP_THROT_CAUSALITY_DETECTED__GFX103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxSpiPerfcntSelGfx11 = SPI_PERF_BUSY__GFX11; -#endif - -typedef enum SPI_PNT_SPRITE_OVERRIDE { - SPI_PNT_SPRITE_SEL_0 = 0x00000000, - SPI_PNT_SPRITE_SEL_1 = 0x00000001, - SPI_PNT_SPRITE_SEL_S = 0x00000002, - SPI_PNT_SPRITE_SEL_T = 0x00000003, - SPI_PNT_SPRITE_SEL_NONE = 0x00000004, -} SPI_PNT_SPRITE_OVERRIDE; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef enum SPI_PS_LDS_GROUP_SIZE { - SPI_PS_LDS_GROUP_1 = 0x00000000, - SPI_PS_LDS_GROUP_2 = 0x00000001, - SPI_PS_LDS_GROUP_4 = 0x00000002, -} SPI_PS_LDS_GROUP_SIZE; -#endif - -typedef enum SPI_SAMPLE_CNTL { - CENTROIDS_ONLY = 0x00000000, - CENTERS_ONLY = 0x00000001, - CENTROIDS_AND_CENTERS = 0x00000002, - UNDEF = 0x00000003, -} SPI_SAMPLE_CNTL; - -typedef enum SPI_SHADER_EX_FORMAT { - SPI_SHADER_ZERO = 0x00000000, - SPI_SHADER_32_R = 0x00000001, - SPI_SHADER_32_GR = 0x00000002, - SPI_SHADER_32_AR = 0x00000003, - SPI_SHADER_FP16_ABGR = 0x00000004, - SPI_SHADER_UNORM16_ABGR = 0x00000005, - SPI_SHADER_SNORM16_ABGR = 0x00000006, - SPI_SHADER_UINT16_ABGR = 0x00000007, - SPI_SHADER_SINT16_ABGR = 0x00000008, - SPI_SHADER_32_ABGR = 0x00000009, -} SPI_SHADER_EX_FORMAT; - -typedef enum SPI_SHADER_FORMAT { - SPI_SHADER_NONE = 0x00000000, - SPI_SHADER_1COMP = 0x00000001, - SPI_SHADER_2COMP = 0x00000002, - SPI_SHADER_4COMPRESS = 0x00000003, - SPI_SHADER_4COMP = 0x00000004, -} SPI_SHADER_FORMAT; - -typedef enum SPM_PERFMON_STATE { - STRM_PERFMON_STATE_DISABLE_AND_RESET = 0x00000000, - STRM_PERFMON_STATE_START_COUNTING = 0x00000001, - STRM_PERFMON_STATE_STOP_COUNTING = 0x00000002, - STRM_PERFMON_STATE_RESERVED_3 = 0x00000003, - STRM_PERFMON_STATE_DISABLE_AND_RESET_PHANTOM = 0x00000004, - STRM_PERFMON_STATE_COUNT_AND_DUMP_PHANTOM = 0x00000005, -} SPM_PERFMON_STATE; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef enum SQG_PERF_SEL { - SQG_PERF_SEL_NONE = 0x00000000, - SQG_PERF_SEL_MSG_BUS_BUSY = 0x00000001, - SQG_PERF_SEL_EXP_REQ0_BUS_BUSY = 0x00000002, - SQG_PERF_SEL_EXP_REQ1_BUS_BUSY = 0x00000003, - SQG_PERF_SEL_EXP_BUS0_BUSY = 0x00000004, - SQG_PERF_SEL_EXP_BUS1_BUSY = 0x00000005, - SQG_PERF_SEL_TTRACE_REQS = 0x00000006, - SQG_PERF_SEL_TTRACE_INFLIGHT_REQS = 0x00000007, - SQG_PERF_SEL_TTRACE_STALL = 0x00000008, - SQG_PERF_SEL_TTRACE_LOST_PACKETS = 0x00000009, - SQG_PERF_SEL_WAVES_INITIAL_PREFETCH = 0x0000000a, - SQG_PERF_SEL_EVENTS = 0x0000000b, - SQG_PERF_SEL_WAVES_RESTORED = 0x0000000c, - SQG_PERF_SEL_WAVES_SAVED = 0x0000000d, - SQG_PERF_SEL_ACCUM_PREV = 0x0000000e, - SQG_PERF_SEL_CYCLES = 0x0000000f, - SQG_PERF_SEL_BUSY_CYCLES = 0x00000010, - SQG_PERF_SEL_WAVE_CYCLES = 0x00000011, - SQG_PERF_SEL_MSG = 0x00000012, - SQG_PERF_SEL_MSG_INTERRUPT = 0x00000013, - SQG_PERF_SEL_WAVES = 0x00000014, - SQG_PERF_SEL_WAVES_32 = 0x00000015, - SQG_PERF_SEL_WAVES_64 = 0x00000016, - SQG_PERF_SEL_LEVEL_WAVES = 0x00000017, - SQG_PERF_SEL_ITEMS = 0x00000018, - SQG_PERF_SEL_WAVE32_ITEMS = 0x00000019, - SQG_PERF_SEL_WAVE64_ITEMS = 0x0000001a, - SQG_PERF_SEL_PS_QUADS = 0x0000001b, - SQG_PERF_SEL_WAVES_EQ_64 = 0x0000001c, - SQG_PERF_SEL_WAVES_EQ_32 = 0x0000001d, - SQG_PERF_SEL_WAVES_LT_64 = 0x0000001e, - SQG_PERF_SEL_WAVES_LT_48 = 0x0000001f, - SQG_PERF_SEL_WAVES_LT_32 = 0x00000020, - SQG_PERF_SEL_WAVES_LT_16 = 0x00000021, - SQG_PERF_SEL_WAVES_STARTED = 0x00000022, - SQG_PERF_SEL_WAVES_ENDED = 0x00000023, - SQG_PERF_SEL_DUMMY_LAST = 0x00000024, -} SQG_PERF_SEL; - -constexpr unsigned int MaxSqgPerfSel = SQG_PERF_SEL_DUMMY_LAST; -#endif - -typedef enum SQ_IMG_FILTER_TYPE { - SQ_IMG_FILTER_MODE_BLEND = 0x00000000, - SQ_IMG_FILTER_MODE_MIN = 0x00000001, - SQ_IMG_FILTER_MODE_MAX = 0x00000002, -} SQ_IMG_FILTER_TYPE; - -typedef enum SQ_LLC_CTL { - SQ_LLC_0 = 0x00000000, - SQ_LLC_1 = 0x00000001, - SQ_LLC_RSVD_2 = 0x00000002, - SQ_LLC_BYPASS = 0x00000003, -} SQ_LLC_CTL; - -typedef enum SQ_OOB_SELECT { - SQ_OOB_INDEX_AND_OFFSET = 0x00000000, - SQ_OOB_INDEX_ONLY = 0x00000001, - SQ_OOB_NUM_RECORDS_0 = 0x00000002, - SQ_OOB_COMPLETE = 0x00000003, -} SQ_OOB_SELECT; - -typedef enum SQ_PERF_SEL { - SQ_PERF_SEL_NONE = 0x00000000, - SQ_PERF_SEL_ACCUM_PREV = 0x00000001, - SQ_PERF_SEL_CYCLES = 0x00000002, - SQ_PERF_SEL_BUSY_CYCLES = 0x00000003, - SQ_PERF_SEL_WAVES = 0x00000004, - SQ_PERF_SEL_DUMMY_LAST = 0x000000ff, - SQ_PERF_SEL_LEVEL_WAVES__GFX09 = 0x00000005, - SQ_PERF_SEL_WAVES_EQ_64__GFX09 = 0x00000006, - SQ_PERF_SEL_WAVES_LT_64__GFX09 = 0x00000007, - SQ_PERF_SEL_WAVES_LT_48__GFX09 = 0x00000008, - SQ_PERF_SEL_WAVES_LT_32__GFX09 = 0x00000009, - SQ_PERF_SEL_WAVES_LT_16__GFX09 = 0x0000000a, - SQ_PERF_SEL_WAVES_CU__GFX09 = 0x0000000b, - SQ_PERF_SEL_LEVEL_WAVES_CU__GFX09 = 0x0000000c, - SQ_PERF_SEL_BUSY_CU_CYCLES__GFX09 = 0x0000000d, - SQ_PERF_SEL_ITEMS__GFX09 = 0x0000000e, - SQ_PERF_SEL_QUADS__GFX09 = 0x0000000f, - SQ_PERF_SEL_EVENTS__GFX09 = 0x00000010, - SQ_PERF_SEL_SURF_SYNCS__GFX09 = 0x00000011, - SQ_PERF_SEL_TTRACE_REQS__GFX09 = 0x00000012, - SQ_PERF_SEL_TTRACE_INFLIGHT_REQS__GFX09 = 0x00000013, - SQ_PERF_SEL_TTRACE_STALL__GFX09 = 0x00000014, - SQ_PERF_SEL_MSG_CNTR__GFX09 = 0x00000015, - SQ_PERF_SEL_MSG_PERF__GFX09 = 0x00000016, - SQ_PERF_SEL_MSG_GSCNT__GFX09 = 0x00000017, - SQ_PERF_SEL_MSG_INTERRUPT__GFX09 = 0x00000018, - SQ_PERF_SEL_INSTS__GFX09 = 0x00000019, - SQ_PERF_SEL_INSTS_VALU__GFX09 = 0x0000001a, - SQ_PERF_SEL_INSTS_VMEM_WR__GFX09 = 0x0000001b, - SQ_PERF_SEL_INSTS_VMEM_RD__GFX09 = 0x0000001c, - SQ_PERF_SEL_INSTS_VMEM__GFX09 = 0x0000001d, - SQ_PERF_SEL_INSTS_SALU__GFX09 = 0x0000001e, - SQ_PERF_SEL_INSTS_SMEM__GFX09 = 0x0000001f, - SQ_PERF_SEL_INSTS_FLAT__GFX09 = 0x00000020, - SQ_PERF_SEL_INSTS_FLAT_LDS_ONLY__GFX09 = 0x00000021, - SQ_PERF_SEL_INSTS_LDS__GFX09 = 0x00000022, - SQ_PERF_SEL_INSTS_GDS__GFX09 = 0x00000023, - SQ_PERF_SEL_INSTS_EXP__GFX09 = 0x00000024, - SQ_PERF_SEL_INSTS_EXP_GDS__GFX09 = 0x00000025, - SQ_PERF_SEL_INSTS_BRANCH__GFX09 = 0x00000026, - SQ_PERF_SEL_INSTS_SENDMSG__GFX09 = 0x00000027, - SQ_PERF_SEL_INSTS_VSKIPPED__GFX09 = 0x00000028, - SQ_PERF_SEL_INST_LEVEL_VMEM__GFX09 = 0x00000029, - SQ_PERF_SEL_INST_LEVEL_SMEM__GFX09 = 0x0000002a, - SQ_PERF_SEL_INST_LEVEL_LDS__GFX09 = 0x0000002b, - SQ_PERF_SEL_INST_LEVEL_GDS__GFX09 = 0x0000002c, - SQ_PERF_SEL_INST_LEVEL_EXP__GFX09 = 0x0000002d, - SQ_PERF_SEL_WAVE_CYCLES__GFX09 = 0x0000002e, - SQ_PERF_SEL_WAVE_READY__GFX09 = 0x0000002f, - SQ_PERF_SEL_WAIT_CNT_VM__GFX09 = 0x00000030, - SQ_PERF_SEL_WAIT_CNT_LGKM__GFX09 = 0x00000031, - SQ_PERF_SEL_WAIT_CNT_EXP__GFX09 = 0x00000032, - SQ_PERF_SEL_WAIT_CNT_ANY__GFX09 = 0x00000033, - SQ_PERF_SEL_WAIT_BARRIER__GFX09 = 0x00000034, - SQ_PERF_SEL_WAIT_EXP_ALLOC__GFX09 = 0x00000035, - SQ_PERF_SEL_WAIT_SLEEP__GFX09 = 0x00000036, - SQ_PERF_SEL_WAIT_SLEEP_XNACK__GFX09 = 0x00000037, - SQ_PERF_SEL_WAIT_OTHER__GFX09 = 0x00000038, - SQ_PERF_SEL_WAIT_ANY__GFX09 = 0x00000039, - SQ_PERF_SEL_WAIT_TTRACE__GFX09 = 0x0000003a, - SQ_PERF_SEL_WAIT_IFETCH__GFX09 = 0x0000003b, - SQ_PERF_SEL_WAIT_INST_ANY__GFX09 = 0x0000003c, - SQ_PERF_SEL_WAIT_INST_VMEM__GFX09 = 0x0000003d, - SQ_PERF_SEL_WAIT_INST_SCA__GFX09 = 0x0000003e, - SQ_PERF_SEL_WAIT_INST_LDS__GFX09 = 0x0000003f, - SQ_PERF_SEL_WAIT_INST_VALU__GFX09 = 0x00000040, - SQ_PERF_SEL_WAIT_INST_EXP_GDS__GFX09 = 0x00000041, - SQ_PERF_SEL_WAIT_INST_MISC__GFX09 = 0x00000042, - SQ_PERF_SEL_WAIT_INST_FLAT__GFX09 = 0x00000043, - SQ_PERF_SEL_ACTIVE_INST_ANY__GFX09 = 0x00000044, - SQ_PERF_SEL_ACTIVE_INST_VMEM__GFX09 = 0x00000045, - SQ_PERF_SEL_ACTIVE_INST_LDS__GFX09 = 0x00000046, - SQ_PERF_SEL_ACTIVE_INST_VALU__GFX09 = 0x00000047, - SQ_PERF_SEL_ACTIVE_INST_SCA__GFX09 = 0x00000048, - SQ_PERF_SEL_ACTIVE_INST_EXP_GDS__GFX09 = 0x00000049, - SQ_PERF_SEL_ACTIVE_INST_MISC__GFX09 = 0x0000004a, - SQ_PERF_SEL_ACTIVE_INST_FLAT__GFX09 = 0x0000004b, - SQ_PERF_SEL_INST_CYCLES_VMEM_WR__GFX09 = 0x0000004c, - SQ_PERF_SEL_INST_CYCLES_VMEM_RD__GFX09 = 0x0000004d, - SQ_PERF_SEL_INST_CYCLES_VMEM_ADDR__GFX09 = 0x0000004e, - SQ_PERF_SEL_INST_CYCLES_VMEM_DATA__GFX09 = 0x0000004f, - SQ_PERF_SEL_INST_CYCLES_VMEM_CMD__GFX09 = 0x00000050, - SQ_PERF_SEL_INST_CYCLES_EXP__GFX09 = 0x00000051, - SQ_PERF_SEL_INST_CYCLES_GDS__GFX09 = 0x00000052, - SQ_PERF_SEL_INST_CYCLES_SMEM__GFX09 = 0x00000053, - SQ_PERF_SEL_INST_CYCLES_SALU__GFX09 = 0x00000054, - SQ_PERF_SEL_THREAD_CYCLES_VALU__GFX09 = 0x00000055, - SQ_PERF_SEL_THREAD_CYCLES_VALU_MAX__GFX09 = 0x00000056, - SQ_PERF_SEL_IFETCH__GFX09 = 0x00000057, - SQ_PERF_SEL_IFETCH_LEVEL__GFX09 = 0x00000058, - SQ_PERF_SEL_CBRANCH_FORK__GFX09 = 0x00000059, - SQ_PERF_SEL_CBRANCH_FORK_SPLIT__GFX09 = 0x0000005a, - SQ_PERF_SEL_VALU_LDS_DIRECT_RD__GFX09 = 0x0000005b, - SQ_PERF_SEL_VALU_LDS_INTERP_OP__GFX09 = 0x0000005c, - SQ_PERF_SEL_LDS_BANK_CONFLICT__GFX09 = 0x0000005d, - SQ_PERF_SEL_LDS_ADDR_CONFLICT__GFX09 = 0x0000005e, - SQ_PERF_SEL_LDS_UNALIGNED_STALL__GFX09 = 0x0000005f, - SQ_PERF_SEL_LDS_MEM_VIOLATIONS__GFX09 = 0x00000060, - SQ_PERF_SEL_LDS_ATOMIC_RETURN__GFX09 = 0x00000061, - SQ_PERF_SEL_LDS_IDX_ACTIVE__GFX09 = 0x00000062, - SQ_PERF_SEL_VALU_DEP_STALL__GFX09 = 0x00000063, - SQ_PERF_SEL_VALU_STARVE__GFX09 = 0x00000064, - SQ_PERF_SEL_TA_STARVE__GFX09 = 0x00000065, - SQ_PERF_SEL_EXP_REQ_FIFO_FULL__GFX09 = 0x00000066, - SQ_PERF_SEL_LDS_DATA_FIFO_FULL__GFX09 = 0x00000067, - SQ_PERF_SEL_LDS_CMD_FIFO_FULL__GFX09 = 0x00000068, - SQ_PERF_SEL_VMEM_TA_ADDR_FIFO_FULL__GFX09 = 0x00000069, - SQ_PERF_SEL_VMEM_TA_CMD_FIFO_FULL__GFX09 = 0x0000006a, - SQ_PERF_SEL_VMEM_EX_DATA_REG_BUSY__GFX09 = 0x0000006b, - SQ_PERF_SEL_VMEM_WR_TA_DATA_FIFO_FULL__GFX09 = 0x0000006c, - SQ_PERF_SEL_VALU_SRC_C_CONFLICT__GFX09 = 0x0000006d, - SQ_PERF_SEL_VMEM_RD_SRC_CD_CONFLICT__GFX09 = 0x0000006e, - SQ_PERF_SEL_VMEM_WR_SRC_CD_CONFLICT__GFX09 = 0x0000006f, - SQ_PERF_SEL_FLAT_SRC_CD_CONFLICT__GFX09 = 0x00000070, - SQ_PERF_SEL_LDS_SRC_CD_CONFLICT__GFX09 = 0x00000071, - SQ_PERF_SEL_SRC_CD_BUSY__GFX09 = 0x00000072, - SQ_PERF_SEL_PT_POWER_STALL__GFX09 = 0x00000073, - SQ_PERF_SEL_USER0__GFX09 = 0x00000074, - SQ_PERF_SEL_USER1__GFX09 = 0x00000075, - SQ_PERF_SEL_USER2__GFX09 = 0x00000076, - SQ_PERF_SEL_USER3__GFX09 = 0x00000077, - SQ_PERF_SEL_USER4__GFX09 = 0x00000078, - SQ_PERF_SEL_USER5__GFX09 = 0x00000079, - SQ_PERF_SEL_USER6__GFX09 = 0x0000007a, - SQ_PERF_SEL_USER7__GFX09 = 0x0000007b, - SQ_PERF_SEL_USER8__GFX09 = 0x0000007c, - SQ_PERF_SEL_USER9__GFX09 = 0x0000007d, - SQ_PERF_SEL_USER10__GFX09 = 0x0000007e, - SQ_PERF_SEL_USER11__GFX09 = 0x0000007f, - SQ_PERF_SEL_USER12__GFX09 = 0x00000080, - SQ_PERF_SEL_USER13__GFX09 = 0x00000081, - SQ_PERF_SEL_USER14__GFX09 = 0x00000082, - SQ_PERF_SEL_USER15__GFX09 = 0x00000083, - SQ_PERF_SEL_USER_LEVEL0__GFX09 = 0x00000084, - SQ_PERF_SEL_USER_LEVEL1__GFX09 = 0x00000085, - SQ_PERF_SEL_USER_LEVEL2__GFX09 = 0x00000086, - SQ_PERF_SEL_USER_LEVEL3__GFX09 = 0x00000087, - SQ_PERF_SEL_USER_LEVEL4__GFX09 = 0x00000088, - SQ_PERF_SEL_USER_LEVEL5__GFX09 = 0x00000089, - SQ_PERF_SEL_USER_LEVEL6__GFX09 = 0x0000008a, - SQ_PERF_SEL_USER_LEVEL7__GFX09 = 0x0000008b, - SQ_PERF_SEL_USER_LEVEL8__GFX09 = 0x0000008c, - SQ_PERF_SEL_USER_LEVEL9__GFX09 = 0x0000008d, - SQ_PERF_SEL_USER_LEVEL10__GFX09 = 0x0000008e, - SQ_PERF_SEL_USER_LEVEL11__GFX09 = 0x0000008f, - SQ_PERF_SEL_USER_LEVEL12__GFX09 = 0x00000090, - SQ_PERF_SEL_USER_LEVEL13__GFX09 = 0x00000091, - SQ_PERF_SEL_USER_LEVEL14__GFX09 = 0x00000092, - SQ_PERF_SEL_USER_LEVEL15__GFX09 = 0x00000093, - SQ_PERF_SEL_POWER_VALU__GFX09 = 0x00000094, - SQ_PERF_SEL_POWER_VALU0__GFX09 = 0x00000095, - SQ_PERF_SEL_POWER_VALU1__GFX09 = 0x00000096, - SQ_PERF_SEL_POWER_VALU2__GFX09 = 0x00000097, - SQ_PERF_SEL_POWER_GPR_RD__GFX09 = 0x00000098, - SQ_PERF_SEL_POWER_GPR_WR__GFX09 = 0x00000099, - SQ_PERF_SEL_POWER_LDS_BUSY__GFX09 = 0x0000009a, - SQ_PERF_SEL_POWER_ALU_BUSY__GFX09 = 0x0000009b, - SQ_PERF_SEL_POWER_TEX_BUSY__GFX09 = 0x0000009c, - SQ_PERF_SEL_ACCUM_PREV_HIRES__GFX09 = 0x0000009d, - SQ_PERF_SEL_WAVES_RESTORED__GFX09 = 0x0000009e, - SQ_PERF_SEL_WAVES_SAVED__GFX09 = 0x0000009f, - SQ_PERF_SEL_INSTS_SMEM_NORM__GFX09 = 0x000000a0, - SQ_PERF_SEL_XNACK_FIRST__GFX09 = 0x000000a1, - SQ_PERF_SEL_XNACK_ALL__GFX09 = 0x000000a2, - SQ_PERF_SEL_XNACK_FIFO_FULL__GFX09 = 0x000000a3, - SQ_PERF_SEL_IFETCH_XNACK__GFX09 = 0x000000a4, - SQ_PERF_SEL_TLB_SHOOTDOWN__GFX09 = 0x000000a5, - SQ_PERF_SEL_TLB_SHOOTDOWN_CYCLES__GFX09 = 0x000000a6, - SQ_PERF_SEL_INSTS_VMEM_WR_REPLAY__GFX09 = 0x000000a7, - SQ_PERF_SEL_INSTS_VMEM_RD_REPLAY__GFX09 = 0x000000a8, - SQ_PERF_SEL_INSTS_VMEM_REPLAY__GFX09 = 0x000000a9, - SQ_PERF_SEL_INSTS_SMEM_REPLAY__GFX09 = 0x000000aa, - SQ_PERF_SEL_INSTS_SMEM_NORM_REPLAY__GFX09 = 0x000000ab, - SQ_PERF_SEL_INSTS_FLAT_REPLAY__GFX09 = 0x000000ac, - SQ_PERF_SEL_UTCL1_TRANSLATION_MISS__GFX09 = 0x000000ad, - SQ_PERF_SEL_UTCL1_PERMISSION_MISS__GFX09 = 0x000000ae, - SQ_PERF_SEL_UTCL1_TRANSLATION_HIT_EVENT__GFX09 = 0x000000af, - SQ_PERF_SEL_UTCL1_REQUEST__GFX09 = 0x000000b0, - SQ_PERF_SEL_UTCL1_STALL_MISSFIFO_FULL__GFX09 = 0x000000b1, - SQ_PERF_SEL_UTCL1_STALL_INFLIGHT_MAX__GFX09 = 0x000000b2, - SQ_PERF_SEL_UTCL1_STALL_LRU_INFLIGHT__GFX09 = 0x000000b3, - SQ_PERF_SEL_UTCL1_LFIFO_FULL__GFX09 = 0x000000b4, - SQ_PERF_SEL_UTCL1_STALL_LFIFO_NOT_RES__GFX09 = 0x000000b5, - SQ_PERF_SEL_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS__GFX09 = 0x000000b6, - SQ_PERF_SEL_DUMMY_END__GFX09 = 0x000000b7, - SQC_PERF_SEL_ICACHE_INPUT_VALID_READY__GFX09 = 0x00000100, - SQC_PERF_SEL_ICACHE_INPUT_VALID_READYB__GFX09 = 0x00000101, - SQC_PERF_SEL_ICACHE_INPUT_VALIDB__GFX09 = 0x00000102, - SQC_PERF_SEL_DCACHE_INPUT_VALID_READY__GFX09 = 0x00000103, - SQC_PERF_SEL_DCACHE_INPUT_VALID_READYB__GFX09 = 0x00000104, - SQC_PERF_SEL_DCACHE_INPUT_VALIDB__GFX09 = 0x00000105, - SQC_PERF_SEL_TC_REQ__GFX09 = 0x00000106, - SQC_PERF_SEL_TC_INST_REQ__GFX09 = 0x00000107, - SQC_PERF_SEL_TC_DATA_READ_REQ__GFX09 = 0x00000108, - SQC_PERF_SEL_TC_DATA_WRITE_REQ__GFX09 = 0x00000109, - SQC_PERF_SEL_TC_DATA_ATOMIC_REQ__GFX09 = 0x0000010a, - SQC_PERF_SEL_TC_STALL__GFX09 = 0x0000010b, - SQC_PERF_SEL_TC_STARVE__GFX09 = 0x0000010c, - SQC_PERF_SEL_ICACHE_BUSY_CYCLES__GFX09 = 0x0000010d, - SQC_PERF_SEL_ICACHE_REQ__GFX09 = 0x0000010e, - SQC_PERF_SEL_ICACHE_HITS__GFX09 = 0x0000010f, - SQC_PERF_SEL_ICACHE_MISSES__GFX09 = 0x00000110, - SQC_PERF_SEL_ICACHE_MISSES_DUPLICATE__GFX09 = 0x00000111, - SQC_PERF_SEL_ICACHE_INVAL_INST__GFX09 = 0x00000112, - SQC_PERF_SEL_ICACHE_INVAL_ASYNC__GFX09 = 0x00000113, - SQC_PERF_SEL_ICACHE_INPUT_STALL_ARB_NO_GRANT__GFX09 = 0x00000114, - SQC_PERF_SEL_ICACHE_INPUT_STALL_BANK_READYB__GFX09 = 0x00000115, - SQC_PERF_SEL_ICACHE_CACHE_STALLED__GFX09 = 0x00000116, - SQC_PERF_SEL_ICACHE_CACHE_STALL_INFLIGHT_NONZERO__GFX09 = 0x00000117, - SQC_PERF_SEL_ICACHE_CACHE_STALL_INFLIGHT_MAX__GFX09 = 0x00000118, - SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT__GFX09 = 0x00000119, - SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_MISS_FIFO__GFX09 = 0x0000011a, - SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_HIT_FIFO__GFX09 = 0x0000011b, - SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_TC_IF__GFX09 = 0x0000011c, - SQC_PERF_SEL_ICACHE_STALL_OUTXBAR_ARB_NO_GRANT__GFX09 = 0x0000011d, - SQC_PERF_SEL_ICACHE_PREFETCH_1__GFX09 = 0x0000011e, - SQC_PERF_SEL_ICACHE_PREFETCH_2__GFX09 = 0x0000011f, - SQC_PERF_SEL_ICACHE_PREFETCH_FILTERED__GFX09 = 0x00000120, - SQC_PERF_SEL_DCACHE_BUSY_CYCLES__GFX09 = 0x00000121, - SQC_PERF_SEL_DCACHE_REQ__GFX09 = 0x00000122, - SQC_PERF_SEL_DCACHE_HITS__GFX09 = 0x00000123, - SQC_PERF_SEL_DCACHE_MISSES__GFX09 = 0x00000124, - SQC_PERF_SEL_DCACHE_MISSES_DUPLICATE__GFX09 = 0x00000125, - SQC_PERF_SEL_DCACHE_HIT_LRU_READ__GFX09 = 0x00000126, - SQC_PERF_SEL_DCACHE_MISS_EVICT_READ__GFX09 = 0x00000127, - SQC_PERF_SEL_DCACHE_WC_LRU_WRITE__GFX09 = 0x00000128, - SQC_PERF_SEL_DCACHE_WT_EVICT_WRITE__GFX09 = 0x00000129, - SQC_PERF_SEL_DCACHE_ATOMIC__GFX09 = 0x0000012a, - SQC_PERF_SEL_DCACHE_VOLATILE__GFX09 = 0x0000012b, - SQC_PERF_SEL_DCACHE_INVAL_INST__GFX09 = 0x0000012c, - SQC_PERF_SEL_DCACHE_INVAL_ASYNC__GFX09 = 0x0000012d, - SQC_PERF_SEL_DCACHE_INVAL_VOLATILE_INST__GFX09 = 0x0000012e, - SQC_PERF_SEL_DCACHE_INVAL_VOLATILE_ASYNC__GFX09 = 0x0000012f, - SQC_PERF_SEL_DCACHE_WB_INST__GFX09 = 0x00000130, - SQC_PERF_SEL_DCACHE_WB_ASYNC__GFX09 = 0x00000131, - SQC_PERF_SEL_DCACHE_WB_VOLATILE_INST__GFX09 = 0x00000132, - SQC_PERF_SEL_DCACHE_WB_VOLATILE_ASYNC__GFX09 = 0x00000133, - SQC_PERF_SEL_DCACHE_INPUT_STALL_ARB_NO_GRANT__GFX09 = 0x00000134, - SQC_PERF_SEL_DCACHE_INPUT_STALL_BANK_READYB__GFX09 = 0x00000135, - SQC_PERF_SEL_DCACHE_CACHE_STALLED__GFX09 = 0x00000136, - SQC_PERF_SEL_DCACHE_CACHE_STALL_INFLIGHT_MAX__GFX09 = 0x00000137, - SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT__GFX09 = 0x00000138, - SQC_PERF_SEL_DCACHE_CACHE_STALL_EVICT__GFX09 = 0x00000139, - SQC_PERF_SEL_DCACHE_CACHE_STALL_UNORDERED__GFX09 = 0x0000013a, - SQC_PERF_SEL_DCACHE_CACHE_STALL_ALLOC_UNAVAILABLE__GFX09 = 0x0000013b, - SQC_PERF_SEL_DCACHE_CACHE_STALL_FORCE_EVICT__GFX09 = 0x0000013c, - SQC_PERF_SEL_DCACHE_CACHE_STALL_MULTI_FLUSH__GFX09 = 0x0000013d, - SQC_PERF_SEL_DCACHE_CACHE_STALL_FLUSH_DONE__GFX09 = 0x0000013e, - SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_MISS_FIFO__GFX09 = 0x0000013f, - SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_HIT_FIFO__GFX09 = 0x00000140, - SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_TC_IF__GFX09 = 0x00000141, - SQC_PERF_SEL_DCACHE_STALL_OUTXBAR_ARB_NO_GRANT__GFX09 = 0x00000142, - SQC_PERF_SEL_DCACHE_REQ_READ_1__GFX09 = 0x00000143, - SQC_PERF_SEL_DCACHE_REQ_READ_2__GFX09 = 0x00000144, - SQC_PERF_SEL_DCACHE_REQ_READ_4__GFX09 = 0x00000145, - SQC_PERF_SEL_DCACHE_REQ_READ_8__GFX09 = 0x00000146, - SQC_PERF_SEL_DCACHE_REQ_READ_16__GFX09 = 0x00000147, - SQC_PERF_SEL_DCACHE_REQ_TIME__GFX09 = 0x00000148, - SQC_PERF_SEL_DCACHE_REQ_WRITE_1__GFX09 = 0x00000149, - SQC_PERF_SEL_DCACHE_REQ_WRITE_2__GFX09 = 0x0000014a, - SQC_PERF_SEL_DCACHE_REQ_WRITE_4__GFX09 = 0x0000014b, - SQC_PERF_SEL_DCACHE_REQ_ATC_PROBE__GFX09 = 0x0000014c, - SQC_PERF_SEL_SQ_DCACHE_REQS__GFX09 = 0x0000014d, - SQC_PERF_SEL_DCACHE_FLAT_REQ__GFX09 = 0x0000014e, - SQC_PERF_SEL_DCACHE_NONFLAT_REQ__GFX09 = 0x0000014f, - SQC_PERF_SEL_ICACHE_INFLIGHT_LEVEL__GFX09 = 0x00000150, - SQC_PERF_SEL_DCACHE_INFLIGHT_LEVEL__GFX09 = 0x00000151, - SQC_PERF_SEL_TC_INFLIGHT_LEVEL__GFX09 = 0x00000152, - SQC_PERF_SEL_ICACHE_TC_INFLIGHT_LEVEL__GFX09 = 0x00000153, - SQC_PERF_SEL_DCACHE_TC_INFLIGHT_LEVEL__GFX09 = 0x00000154, - SQC_PERF_SEL_ICACHE_GATCL1_TRANSLATION_MISS__GFX09 = 0x00000155, - SQC_PERF_SEL_ICACHE_GATCL1_PERMISSION_MISS__GFX09 = 0x00000156, - SQC_PERF_SEL_ICACHE_GATCL1_TRANSLATION_HIT__GFX09 = 0x00000157, - SQC_PERF_SEL_ICACHE_GATCL1_REQUEST__GFX09 = 0x00000158, - SQC_PERF_SEL_ICACHE_GATCL1_STALL_INFLIGHT_MAX__GFX09 = 0x00000159, - SQC_PERF_SEL_ICACHE_GATCL1_STALL_LRU_INFLIGHT__GFX09 = 0x0000015a, - SQC_PERF_SEL_ICACHE_GATCL1_LFIFO_FULL__GFX09 = 0x0000015b, - SQC_PERF_SEL_ICACHE_GATCL1_STALL_LFIFO_NOT_RES__GFX09 = 0x0000015c, - SQC_PERF_SEL_ICACHE_GATCL1_STALL_ATCL2_REQ_OUT_OF_CREDITS__GFX09 = 0x0000015d, - SQC_PERF_SEL_ICACHE_GATCL1_ATCL2_INFLIGHT__GFX09 = 0x0000015e, - SQC_PERF_SEL_ICACHE_GATCL1_STALL_MISSFIFO_FULL__GFX09 = 0x0000015f, - SQC_PERF_SEL_DCACHE_GATCL1_TRANSLATION_MISS__GFX09 = 0x00000160, - SQC_PERF_SEL_DCACHE_GATCL1_PERMISSION_MISS__GFX09 = 0x00000161, - SQC_PERF_SEL_DCACHE_GATCL1_TRANSLATION_HIT__GFX09 = 0x00000162, - SQC_PERF_SEL_DCACHE_GATCL1_REQUEST__GFX09 = 0x00000163, - SQC_PERF_SEL_DCACHE_GATCL1_STALL_INFLIGHT_MAX__GFX09 = 0x00000164, - SQC_PERF_SEL_DCACHE_GATCL1_STALL_LRU_INFLIGHT__GFX09 = 0x00000165, - SQC_PERF_SEL_DCACHE_GATCL1_LFIFO_FULL__GFX09 = 0x00000166, - SQC_PERF_SEL_DCACHE_GATCL1_STALL_LFIFO_NOT_RES__GFX09 = 0x00000167, - SQC_PERF_SEL_DCACHE_GATCL1_STALL_ATCL2_REQ_OUT_OF_CREDITS__GFX09 = 0x00000168, - SQC_PERF_SEL_DCACHE_GATCL1_ATCL2_INFLIGHT__GFX09 = 0x00000169, - SQC_PERF_SEL_DCACHE_GATCL1_STALL_MISSFIFO_FULL__GFX09 = 0x0000016a, - SQC_PERF_SEL_DCACHE_GATCL1_STALL_MULTI_MISS__GFX09 = 0x0000016b, - SQC_PERF_SEL_DCACHE_GATCL1_HIT_FIFO_FULL__GFX09 = 0x0000016c, - SQC_PERF_SEL_ICACHE_UTCL1_INFLIGHT_LEVEL__GFX09 = 0x0000016d, - SQC_PERF_SEL_ICACHE_UTCL1_ALL_REQ__GFX09 = 0x0000016e, - SQC_PERF_SEL_ICACHE_UTCL2_INFLIGHT_LEVEL__GFX09 = 0x0000016f, - SQC_PERF_SEL_ICACHE_UTCL2_ALL_REQ__GFX09 = 0x00000170, - SQC_PERF_SEL_DCACHE_UTCL1_INFLIGHT_LEVEL__GFX09 = 0x00000171, - SQC_PERF_SEL_DCACHE_UTCL1_ALL_REQ__GFX09 = 0x00000172, - SQC_PERF_SEL_DCACHE_UTCL2_INFLIGHT_LEVEL__GFX09 = 0x00000173, - SQC_PERF_SEL_DCACHE_UTCL2_ALL_REQ__GFX09 = 0x00000174, - SQC_PERF_SEL_DUMMY_LAST__GFX09 = 0x00000175, - SQ_PERF_SEL_QUADS__GFX10 = 0x0000000b, - SQ_PERF_SEL_WAVES_EQ_64__GFX10 = 0x0000000d, - SQ_PERF_SEL_WAVES_LT_64__GFX10 = 0x0000000e, - SQ_PERF_SEL_WAVES_LT_48__GFX10 = 0x0000000f, - SQ_PERF_SEL_WAVES_LT_32__GFX10 = 0x00000010, - SQ_PERF_SEL_WAVES_LT_16__GFX10 = 0x00000011, - SQ_PERF_SEL_WAVES_RESTORED__GFX10 = 0x00000012, - SQ_PERF_SEL_WAVES_SAVED__GFX10 = 0x00000013, - SQ_PERF_SEL_MSG__GFX10 = 0x00000014, - SQ_PERF_SEL_MSG_GSCNT__GFX10 = 0x00000015, - SQ_PERF_SEL_Reserved_2__GFX10 = 0x00000018, - SQ_PERF_SEL_Reserved_3__GFX10 = 0x00000019, - SQ_PERF_SEL_WAVE_CYCLES__GFX10 = 0x0000001a, - SQ_PERF_SEL_WAVE_READY__GFX10 = 0x0000001b, - SQ_PERF_SEL_WAIT_INST_ANY__GFX10 = 0x0000001c, - SQ_PERF_SEL_WAIT_INST_VALU__GFX10 = 0x0000001d, - SQ_PERF_SEL_WAIT_INST_SCA__GFX10 = 0x0000001e, - SQ_PERF_SEL_WAIT_INST_LDS__GFX10 = 0x0000001f, - SQ_PERF_SEL_WAIT_INST_TEX__GFX10 = 0x00000020, - SQ_PERF_SEL_WAIT_INST_FLAT__GFX10 = 0x00000021, - SQ_PERF_SEL_WAIT_INST_VMEM__GFX10 = 0x00000022, - SQ_PERF_SEL_WAIT_INST_EXP_GDS__GFX10 = 0x00000023, - SQ_PERF_SEL_WAIT_INST_BR_MSG__GFX10 = 0x00000024, - SQ_PERF_SEL_WAIT_ANY__GFX10 = 0x00000025, - SQ_PERF_SEL_WAIT_CNT_ANY__GFX10 = 0x00000026, - SQ_PERF_SEL_WAIT_CNT_VMVS__GFX10 = 0x00000027, - SQ_PERF_SEL_WAIT_CNT_LGKM__GFX10 = 0x00000028, - SQ_PERF_SEL_WAIT_CNT_EXP__GFX10 = 0x00000029, - SQ_PERF_SEL_WAIT_TTRACE__GFX10 = 0x0000002a, - SQ_PERF_SEL_WAIT_IFETCH__GFX10 = 0x0000002b, - SQ_PERF_SEL_WAIT_BARRIER__GFX10 = 0x0000002c, - SQ_PERF_SEL_WAIT_EXP_ALLOC__GFX10 = 0x0000002d, - SQ_PERF_SEL_WAIT_SLEEP__GFX10 = 0x0000002e, - SQ_PERF_SEL_WAIT_OTHER__GFX10 = 0x00000030, - SQ_PERF_SEL_INSTS_ALL__GFX10 = 0x00000031, - SQ_PERF_SEL_INSTS_BRANCH__GFX10 = 0x00000032, - SQ_PERF_SEL_INSTS_CBRANCH_NOT_TAKEN__GFX10 = 0x00000033, - SQ_PERF_SEL_INSTS_CBRANCH_TAKEN__GFX10 = 0x00000034, - SQ_PERF_SEL_INSTS_CBRANCH_TAKEN_HIT_IS__GFX10 = 0x00000035, - SQ_PERF_SEL_INSTS_EXP_GDS__GFX10 = 0x00000036, - SQ_PERF_SEL_INSTS_GDS__GFX10 = 0x00000037, - SQ_PERF_SEL_INSTS_EXP__GFX10 = 0x00000038, - SQ_PERF_SEL_INSTS_FLAT__GFX10 = 0x00000039, - SQ_PERF_SEL_Reserved_4__GFX10 = 0x0000003a, - SQ_PERF_SEL_INSTS_LDS__GFX10 = 0x0000003b, - SQ_PERF_SEL_INSTS_SALU__GFX10 = 0x0000003c, - SQ_PERF_SEL_INSTS_SMEM__GFX10 = 0x0000003d, - SQ_PERF_SEL_INSTS_SMEM_NORM__GFX10 = 0x0000003e, - SQ_PERF_SEL_INSTS_SENDMSG__GFX10 = 0x0000003f, - SQ_PERF_SEL_INSTS_VALU__GFX10 = 0x00000040, - SQ_PERF_SEL_Reserved_17__GFX10 = 0x00000041, - SQ_PERF_SEL_INSTS_VALU_TRANS32__GFX10 = 0x00000042, - SQ_PERF_SEL_INSTS_VALU_NO_COEXEC__GFX10 = 0x00000043, - SQ_PERF_SEL_INSTS_TEX__GFX10 = 0x00000044, - SQ_PERF_SEL_INSTS_TEX_LOAD__GFX10 = 0x00000045, - SQ_PERF_SEL_INSTS_TEX_STORE__GFX10 = 0x00000046, - SQ_PERF_SEL_INSTS_WAVE32__GFX10 = 0x00000047, - SQ_PERF_SEL_INSTS_WAVE32_FLAT__GFX10 = 0x00000048, - SQ_PERF_SEL_Reserved_5__GFX10 = 0x00000049, - SQ_PERF_SEL_INSTS_WAVE32_LDS__GFX10 = 0x0000004a, - SQ_PERF_SEL_INSTS_WAVE32_VALU__GFX10 = 0x0000004b, - SQ_PERF_SEL_INSTS_WAVE32_VALU_TRANS32__GFX10 = 0x0000004d, - SQ_PERF_SEL_INSTS_WAVE32_VALU_NO_COEXEC__GFX10 = 0x0000004e, - SQ_PERF_SEL_INSTS_WAVE32_TEX__GFX10 = 0x0000004f, - SQ_PERF_SEL_INSTS_WAVE32_TEX_LOAD__GFX10 = 0x00000050, - SQ_PERF_SEL_INSTS_WAVE32_TEX_STORE__GFX10 = 0x00000051, - SQ_PERF_SEL_ITEM_CYCLES_VALU__GFX10 = 0x00000052, - SQ_PERF_SEL_VALU_READWRITELANE_CYCLES__GFX10 = 0x00000053, - SQ_PERF_SEL_WAVE32_INSTS__GFX10 = 0x00000054, - SQ_PERF_SEL_WAVE64_INSTS__GFX10 = 0x00000055, - SQ_PERF_SEL_Reserved_18__GFX10 = 0x00000056, - SQ_PERF_SEL_INSTS_VALU_EXEC_SKIPPED__GFX10 = 0x00000057, - SQ_PERF_SEL_WAVE64_HALF_SKIP__GFX10 = 0x00000058, - SQ_PERF_SEL_Reserved_20__GFX10 = 0x0000005b, - SQ_PERF_SEL_INST_LEVEL_EXP__GFX10 = 0x00000061, - SQ_PERF_SEL_INST_LEVEL_GDS__GFX10 = 0x00000062, - SQ_PERF_SEL_INST_LEVEL_LDS__GFX10 = 0x00000063, - SQ_PERF_SEL_INST_LEVEL_SMEM__GFX10 = 0x00000064, - SQ_PERF_SEL_INST_LEVEL_TEX_LOAD__GFX10 = 0x00000065, - SQ_PERF_SEL_INST_LEVEL_TEX_STORE__GFX10 = 0x00000066, - SQ_PERF_SEL_IFETCH_REQS__GFX10 = 0x00000067, - SQ_PERF_SEL_IFETCH_LEVEL__GFX10 = 0x00000068, - SQ_PERF_SEL_Reserved_6__GFX10 = 0x0000006a, - SQ_PERF_SEL_Reserved_7__GFX10 = 0x0000006b, - SQ_PERF_SEL_LDS_DIRECT_CMD_FIFO_FULL_STALL__GFX10 = 0x0000006c, - SQ_PERF_SEL_VALU_SGATHER_STALL__GFX10 = 0x0000006d, - SQ_PERF_SEL_VALU_FWD_BUFFER_FULL_STALL__GFX10 = 0x0000006e, - SQ_PERF_SEL_VALU_SGPR_RD_FIFO_FULL_STALL__GFX10 = 0x0000006f, - SQ_PERF_SEL_VALU_SGATHER_FULL_STALL__GFX10 = 0x00000070, - SQ_PERF_SEL_SALU_SGATHER_STALL__GFX10 = 0x00000071, - SQ_PERF_SEL_SALU_SGPR_RD_FIFO_FULL_STALL__GFX10 = 0x00000072, - SQ_PERF_SEL_SALU_GATHER_FULL_STALL__GFX10 = 0x00000073, - SQ_PERF_SEL_SMEM_DCACHE_FIFO_FULL_STALL__GFX10 = 0x00000074, - SQ_PERF_SEL_INST_CYCLES_VALU__GFX10 = 0x00000075, - SQ_PERF_SEL_INST_CYCLES_VALU_TRANS32__GFX10 = 0x00000076, - SQ_PERF_SEL_INST_CYCLES_VALU_NO_COEXEC__GFX10 = 0x00000077, - SQ_PERF_SEL_INST_CYCLES_VMEM__GFX10 = 0x00000078, - SQ_PERF_SEL_INST_CYCLES_VMEM_LOAD__GFX10 = 0x00000079, - SQ_PERF_SEL_INST_CYCLES_VMEM_STORE__GFX10 = 0x0000007a, - SQ_PERF_SEL_INST_CYCLES_LDS__GFX10 = 0x0000007b, - SQ_PERF_SEL_INST_CYCLES_TEX__GFX10 = 0x0000007c, - SQ_PERF_SEL_INST_CYCLES_FLAT__GFX10 = 0x0000007d, - SQ_PERF_SEL_INST_CYCLES_EXP_GDS__GFX10 = 0x0000007e, - SQ_PERF_SEL_INST_CYCLES_EXP__GFX10 = 0x0000007f, - SQ_PERF_SEL_INST_CYCLES_GDS__GFX10 = 0x00000080, - SQ_PERF_SEL_VALU_STARVE__GFX10 = 0x00000081, - SQ_PERF_SEL_VMEM_ARB_FIFO_FULL__GFX10 = 0x00000082, - SQ_PERF_SEL_MSG_FIFO_FULL_STALL__GFX10 = 0x00000083, - SQ_PERF_SEL_EXP_REQ_FIFO_FULL__GFX10 = 0x00000084, - SQ_PERF_SEL_Reserved_11__GFX10 = 0x00000085, - SQ_PERF_SEL_Reserved_12__GFX10 = 0x00000086, - SQ_PERF_SEL_Reserved_13__GFX10 = 0x00000087, - SQ_PERF_SEL_Reserved_14__GFX10 = 0x00000088, - SQ_PERF_SEL_VMEM_BUS_ACTIVE__GFX10 = 0x00000089, - SQ_PERF_SEL_VMEM_BUS_STALL__GFX10 = 0x0000008a, - SQ_PERF_SEL_VMEM_BUS_STALL_TA_ADDR_FIFO_FULL__GFX10 = 0x0000008b, - SQ_PERF_SEL_VMEM_BUS_STALL_TA_CMD_FIFO_FULL__GFX10 = 0x0000008c, - SQ_PERF_SEL_VMEM_BUS_STALL_LDS_ADDR_FIFO_FULL__GFX10 = 0x0000008d, - SQ_PERF_SEL_VMEM_BUS_STALL_LDS_CMD_FIFO_FULL__GFX10 = 0x0000008e, - SQ_PERF_SEL_VMEM_STARVE_TA_ADDR_EMPTY__GFX10 = 0x0000008f, - SQ_PERF_SEL_VMEM_STARVE_LDS_ADDR_EMPTY__GFX10 = 0x00000090, - SQ_PERF_SEL_Reserved_15__GFX10 = 0x00000091, - SQ_PERF_SEL_SALU_PIPE_STALL__GFX10 = 0x00000092, - SQ_PERF_SEL_SMEM_DCACHE_RETURN_CYCLES__GFX10 = 0x00000093, - SQ_PERF_SEL_Reserved_21__GFX10 = 0x00000094, - SQ_PERF_SEL_MSG_BUS_BUSY__GFX10 = 0x00000095, - SQ_PERF_SEL_EXP_REQ_BUS_STALL__GFX10 = 0x00000096, - SQ_PERF_SEL_EXP_REQ0_BUS_BUSY__GFX10 = 0x00000097, - SQ_PERF_SEL_EXP_REQ1_BUS_BUSY__GFX10 = 0x00000098, - SQ_PERF_SEL_EXP_BUS0_BUSY__GFX10 = 0x00000099, - SQ_PERF_SEL_EXP_BUS1_BUSY__GFX10 = 0x0000009a, - SQ_PERF_SEL_Reserved_19__GFX10 = 0x0000009b, - SQ_PERF_SEL_INST_CACHE_REQ_STALL__GFX10 = 0x0000009c, - SQ_PERF_SEL_USER0__GFX10 = 0x000000a0, - SQ_PERF_SEL_USER1__GFX10 = 0x000000a1, - SQ_PERF_SEL_USER2__GFX10 = 0x000000a2, - SQ_PERF_SEL_USER3__GFX10 = 0x000000a3, - SQ_PERF_SEL_USER4__GFX10 = 0x000000a4, - SQ_PERF_SEL_USER5__GFX10 = 0x000000a5, - SQ_PERF_SEL_USER6__GFX10 = 0x000000a6, - SQ_PERF_SEL_USER7__GFX10 = 0x000000a7, - SQ_PERF_SEL_USER8__GFX10 = 0x000000a8, - SQ_PERF_SEL_USER9__GFX10 = 0x000000a9, - SQ_PERF_SEL_USER10__GFX10 = 0x000000aa, - SQ_PERF_SEL_USER11__GFX10 = 0x000000ab, - SQ_PERF_SEL_USER12__GFX10 = 0x000000ac, - SQ_PERF_SEL_USER13__GFX10 = 0x000000ad, - SQ_PERF_SEL_USER14__GFX10 = 0x000000ae, - SQ_PERF_SEL_USER15__GFX10 = 0x000000af, - SQ_PERF_SEL_USER_LEVEL0__GFX10 = 0x000000b0, - SQ_PERF_SEL_USER_LEVEL1__GFX10 = 0x000000b1, - SQ_PERF_SEL_USER_LEVEL2__GFX10 = 0x000000b2, - SQ_PERF_SEL_USER_LEVEL3__GFX10 = 0x000000b3, - SQ_PERF_SEL_USER_LEVEL4__GFX10 = 0x000000b4, - SQ_PERF_SEL_USER_LEVEL5__GFX10 = 0x000000b5, - SQ_PERF_SEL_USER_LEVEL6__GFX10 = 0x000000b6, - SQ_PERF_SEL_USER_LEVEL7__GFX10 = 0x000000b7, - SQ_PERF_SEL_USER_LEVEL8__GFX10 = 0x000000b8, - SQ_PERF_SEL_USER_LEVEL9__GFX10 = 0x000000b9, - SQ_PERF_SEL_USER_LEVEL10__GFX10 = 0x000000ba, - SQ_PERF_SEL_USER_LEVEL11__GFX10 = 0x000000bb, - SQ_PERF_SEL_USER_LEVEL12__GFX10 = 0x000000bc, - SQ_PERF_SEL_USER_LEVEL13__GFX10 = 0x000000bd, - SQ_PERF_SEL_USER_LEVEL14__GFX10 = 0x000000be, - SQ_PERF_SEL_USER_LEVEL15__GFX10 = 0x000000bf, - SQ_PERF_SEL_VALU_RETURN_SDST__GFX10 = 0x000000c0, - SQG_PERF_SEL_UTCL0_TRANSLATION_MISS__GFX10 = 0x00000100, - SQG_PERF_SEL_UTCL0_PERMISSION_MISS__GFX10 = 0x00000101, - SQG_PERF_SEL_UTCL0_TRANSLATION_HIT__GFX10 = 0x00000102, - SQG_PERF_SEL_UTCL0_REQUEST__GFX10 = 0x00000103, - SQG_PERF_SEL_UTCL0_STALL_MISSFIFO_FULL__GFX10 = 0x00000104, - SQG_PERF_SEL_UTCL0_STALL_INFLIGHT_MAX__GFX10 = 0x00000105, - SQG_PERF_SEL_UTCL0_STALL_LRU_INFLIGHT__GFX10 = 0x00000106, - SQG_PERF_SEL_UTCL0_LFIFO_FULL__GFX10 = 0x00000107, - SQG_PERF_SEL_UTCL0_STALL_LFIFO_NOT_RES__GFX10 = 0x00000108, - SQG_PERF_SEL_UTCL0_STALL_UTCL1_REQ_OUT_OF_CREDITS__GFX10 = 0x00000109, - SQG_PERF_SEL_UTCL0_HIT_FIFO_FULL__GFX10 = 0x0000010a, - SQG_PERF_SEL_UTCL0_UTCL1_REQ__GFX10 = 0x0000010b, - SQG_PERF_SEL_TTRACE_REQS__GFX10 = 0x0000010e, - SQG_PERF_SEL_TTRACE_INFLIGHT_REQS__GFX10 = 0x0000010f, - SQG_PERF_SEL_TTRACE_STALL__GFX10 = 0x00000110, - SQG_PERF_SEL_TTRACE_LOST_PACKETS__GFX10 = 0x00000111, - SQG_PERF_SEL_DUMMY_LAST__GFX10 = 0x00000112, - SQC_PERF_SEL_LDS_BANK_CONFLICT__GFX10 = 0x0000011d, - SQC_PERF_SEL_LDS_ADDR_CONFLICT__GFX10 = 0x0000011e, - SQC_PERF_SEL_LDS_UNALIGNED_STALL__GFX10 = 0x0000011f, - SQC_PERF_SEL_LDS_MEM_VIOLATIONS__GFX10 = 0x00000120, - SQC_PERF_SEL_LDS_ATOMIC_RETURN__GFX10 = 0x00000121, - SQC_PERF_SEL_LDS_IDX_ACTIVE__GFX10 = 0x00000122, - SQC_PERF_SEL_LDS_ADDR_STALL__GFX10 = 0x00000123, - SQC_PERF_SEL_LDS_ADDR_ACTIVE__GFX10 = 0x00000124, - SQC_PERF_SEL_LDS_DIRECT_FIFO_FULL_STALL__GFX10 = 0x00000125, - SQC_PERF_SEL_LDS_PC_LDS_WRITE_STALL_TD__GFX10 = 0x00000126, - SQC_PERF_SEL_LDS_SPI_VGPR_WRITE_STALL_TD__GFX10 = 0x00000127, - SQC_PERF_SEL_LDS_LDS_VGPR_WRITE_STALL__GFX10 = 0x00000128, - SQC_PERF_SEL_LDS_FP_ADD_CYCLES__GFX10 = 0x00000129, - SQC_PERF_SEL_ICACHE_BUSY_CYCLES__GFX10 = 0x0000012a, - SQC_PERF_SEL_ICACHE_REQ__GFX10 = 0x0000012b, - SQC_PERF_SEL_ICACHE_HITS__GFX10 = 0x0000012c, - SQC_PERF_SEL_ICACHE_MISSES__GFX10 = 0x0000012d, - SQC_PERF_SEL_ICACHE_MISSES_DUPLICATE__GFX10 = 0x0000012e, - SQC_PERF_SEL_ICACHE_INVAL_INST__GFX10 = 0x0000012f, - SQC_PERF_SEL_ICACHE_INVAL_ASYNC__GFX10 = 0x00000130, - SQC_PERF_SEL_ICACHE_INFLIGHT_LEVEL__GFX10 = 0x00000131, - SQC_PERF_SEL_DCACHE_INFLIGHT_LEVEL__GFX10 = 0x00000132, - SQC_PERF_SEL_TC_INFLIGHT_LEVEL__GFX10 = 0x00000133, - SQC_PERF_SEL_ICACHE_TC_INFLIGHT_LEVEL__GFX10 = 0x00000134, - SQC_PERF_SEL_DCACHE_TC_INFLIGHT_LEVEL__GFX10 = 0x00000135, - SQC_PERF_SEL_ICACHE_INPUT_VALID_READY__GFX10 = 0x00000136, - SQC_PERF_SEL_ICACHE_INPUT_VALID_READYB__GFX10 = 0x00000137, - SQC_PERF_SEL_ICACHE_INPUT_VALIDB__GFX10 = 0x00000138, - SQC_PERF_SEL_DCACHE_INPUT_VALID_READY__GFX10 = 0x00000139, - SQC_PERF_SEL_DCACHE_INPUT_VALID_READYB__GFX10 = 0x0000013a, - SQC_PERF_SEL_DCACHE_INPUT_VALIDB__GFX10 = 0x0000013b, - SQC_PERF_SEL_TC_REQ__GFX10 = 0x0000013c, - SQC_PERF_SEL_TC_INST_REQ__GFX10 = 0x0000013d, - SQC_PERF_SEL_TC_DATA_READ_REQ__GFX10 = 0x0000013e, - SQC_PERF_SEL_TC_STALL__GFX10 = 0x00000141, - SQC_PERF_SEL_TC_STARVE__GFX10 = 0x00000142, - SQC_PERF_SEL_ICACHE_INPUT_STALL_ARB_NO_GRANT__GFX10 = 0x00000143, - SQC_PERF_SEL_ICACHE_INPUT_STALL_BANK_READYB__GFX10 = 0x00000144, - SQC_PERF_SEL_ICACHE_CACHE_STALLED__GFX10 = 0x00000145, - SQC_PERF_SEL_ICACHE_CACHE_STALL_INFLIGHT_NONZERO__GFX10 = 0x00000146, - SQC_PERF_SEL_ICACHE_CACHE_STALL_INFLIGHT_MAX__GFX10 = 0x00000147, - SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT__GFX10 = 0x00000148, - SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_MISS_FIFO__GFX10 = 0x00000149, - SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_HIT_FIFO__GFX10 = 0x0000014a, - SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_TC_IF__GFX10 = 0x0000014b, - SQC_PERF_SEL_ICACHE_STALL_OUTXBAR_ARB_NO_GRANT__GFX10 = 0x0000014c, - SQC_PERF_SEL_DCACHE_BUSY_CYCLES__GFX10 = 0x0000014d, - SQC_PERF_SEL_DCACHE_REQ__GFX10 = 0x0000014e, - SQC_PERF_SEL_DCACHE_HITS__GFX10 = 0x0000014f, - SQC_PERF_SEL_DCACHE_MISSES__GFX10 = 0x00000150, - SQC_PERF_SEL_DCACHE_MISSES_DUPLICATE__GFX10 = 0x00000151, - SQC_PERF_SEL_DCACHE_INVAL_INST__GFX10 = 0x00000152, - SQC_PERF_SEL_DCACHE_INVAL_ASYNC__GFX10 = 0x00000153, - SQC_PERF_SEL_DCACHE_HIT_LRU_READ__GFX10 = 0x00000154, - SQC_PERF_SEL_DCACHE_INPUT_STALL_ARB_NO_GRANT__GFX10 = 0x0000015a, - SQC_PERF_SEL_DCACHE_INPUT_STALL_BANK_READYB__GFX10 = 0x0000015b, - SQC_PERF_SEL_DCACHE_CACHE_STALLED__GFX10 = 0x0000015c, - SQC_PERF_SEL_DCACHE_CACHE_STALL_INFLIGHT_MAX__GFX10 = 0x0000015d, - SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT__GFX10 = 0x0000015e, - SQC_PERF_SEL_DCACHE_CACHE_STALL_EVICT__GFX10 = 0x0000015f, - SQC_PERF_SEL_DCACHE_CACHE_STALL_UNORDERED__GFX10 = 0x00000160, - SQC_PERF_SEL_DCACHE_CACHE_STALL_ALLOC_UNAVAILABLE__GFX10 = 0x00000161, - SQC_PERF_SEL_DCACHE_CACHE_STALL_FORCE_EVICT__GFX10 = 0x00000162, - SQC_PERF_SEL_DCACHE_CACHE_STALL_MULTI_FLUSH__GFX10 = 0x00000163, - SQC_PERF_SEL_DCACHE_CACHE_STALL_FLUSH_DONE__GFX10 = 0x00000164, - SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_MISS_FIFO__GFX10 = 0x00000165, - SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_HIT_FIFO__GFX10 = 0x00000166, - SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_TC_IF__GFX10 = 0x00000167, - SQC_PERF_SEL_DCACHE_STALL_OUTXBAR_ARB_NO_GRANT__GFX10 = 0x00000168, - SQC_PERF_SEL_DCACHE_REQ_READ_1__GFX10 = 0x00000169, - SQC_PERF_SEL_DCACHE_REQ_READ_2__GFX10 = 0x0000016a, - SQC_PERF_SEL_DCACHE_REQ_READ_4__GFX10 = 0x0000016b, - SQC_PERF_SEL_DCACHE_REQ_READ_8__GFX10 = 0x0000016c, - SQC_PERF_SEL_DCACHE_REQ_READ_16__GFX10 = 0x0000016d, - SQC_PERF_SEL_DCACHE_REQ_TIME__GFX10 = 0x0000016e, - SP_PERF_SEL_DCACHE_WB_CNT__GFX10 = 0x000001c0, - SP_PERF_SEL_DCACHE_STALL__GFX10 = 0x000001c1, - SP_PERF_SEL_DCACHE_HIT__GFX10 = 0x000001c2, - SP_PERF_SEL_DCACHE_HIT_DIRTY__GFX10 = 0x000001c3, - SP_PERF_SEL_DCACHE_WB_CONF_W_LDS__GFX10 = 0x000001c4, - SP_PERF_SEL_DCACHE_WB_CONF_W_TD__GFX10 = 0x000001c5, - SP_PERF_SEL_DCACHE_WB_CONF_W_SPI__GFX10 = 0x000001c6, - SP_PERF_SEL_DCACHE_WB_CONF_W_EXP_VMEM__GFX10 = 0x000001c7, - SP_PERF_SEL_DCACHE_EVEN_DIRTY_CNT__GFX10 = 0x000001c8, - SP_PERF_SEL_DCACHE_ODD_DIRTY_CNT__GFX10 = 0x000001c9, - SP_PERF_SEL_DCACHE_INVALIDATE_BY_VEC__GFX10 = 0x000001ca, - SP_PERF_SEL_GATHER_SRCA_FWD__GFX10 = 0x000001cb, - SP_PERF_SEL_GATHER_SRCB_FWD__GFX10 = 0x000001cc, - SP_PERF_SEL_GATHER_SRCC_FWD__GFX10 = 0x000001cd, - SP_PERF_SEL_GATHER_SRCA_REDUNDANT__GFX10 = 0x000001ce, - SP_PERF_SEL_GATHER_SRCB_REDUNDANT__GFX10 = 0x000001cf, - SP_PERF_SEL_GATHER_SRCC_REDUNDANT__GFX10 = 0x000001d0, - SP_PERF_SEL_GATHER_SRCA_DIRECT__GFX10 = 0x000001d1, - SP_PERF_SEL_GATHER_SRCB_DIRECT__GFX10 = 0x000001d2, - SP_PERF_SEL_GATHER_SRCC_DIRECT__GFX10 = 0x000001d3, - SP_PERF_SEL_VGPR_VALU_BANK0_RD__GFX10 = 0x000001d4, - SP_PERF_SEL_VGPR_VALU_BANK1_RD__GFX10 = 0x000001d5, - SP_PERF_SEL_VGPR_VALU_BANK2_RD__GFX10 = 0x000001d6, - SP_PERF_SEL_VGPR_VALU_BANK3_RD__GFX10 = 0x000001d7, - SP_PERF_SEL_VGPR_VMEM_BANK_RD__GFX10 = 0x000001d8, - SP_PERF_SEL_VGPR_EXP_BANK_RD__GFX10 = 0x000001d9, - SP_PERF_SEL_VGPR_TDDATA_WR__GFX10 = 0x000001da, - SP_PERF_SEL_VGPR_LDSDATA_WR__GFX10 = 0x000001db, - SP_PERF_SEL_PB_STALL__GFX10 = 0x000001dc, - SP_PERF_SEL_PB_SEND__GFX10 = 0x000001dd, - SP_PERF_SEL_LDS_DIRECT_FIFO_FULL__GFX10 = 0x000001de, - SP_PERF_SEL_LDS_DIRECT_FIFO_SEND__GFX10 = 0x000001df, - SP_PERF_SEL_SQ_SP_CONST_FIFO_FULL__GFX10 = 0x000001e0, - SP_PERF_SEL_SQ_SP_CONST_FIFO_SEND__GFX10 = 0x000001e1, - SP_PERF_SEL_SGPR_FULL__GFX10 = 0x000001e2, - SP_PERF_SEL_VALU_CONFLICT_WITH_EXP__GFX10 = 0x000001e3, - SP_PERF_SEL_VALU_CONFLICT_WITH_VMEM_LDS__GFX10 = 0x000001e4, - SP_PERF_SEL_VALU_CONFLICT_WITH_VMEM_NON_LDS__GFX10 = 0x000001e5, - SP_PERF_SEL_VALU_STALL_DUE_CONST_NOT_READY__GFX10 = 0x000001e6, - SP_PERF_SEL_VALU_STALL_DUE_VDST_FWD__GFX10 = 0x000001e7, - SP_PERF_SEL_VALU_STALL_DUE_SDST_FWD__GFX10 = 0x000001e8, - SP_PERF_SEL_VALU_STALL_DUE_LDS_DIR_NOT_READY__GFX10 = 0x000001e9, - SP_PERF_SEL_VALU_STALL_DUE_VGPR_NOT_READY__GFX10 = 0x000001ea, - SP_PERF_SEL_VALU_STALL_DUE_DST_STALL__GFX10 = 0x000001eb, - SP_PERF_SEL_VALU_STALL_DUE_DST_CACHE_WRITE_CONF__GFX10 = 0x000001ec, - SP_PERF_SEL_VALU_STALL__GFX10 = 0x000001ed, - SP_PERF_SEL_PERF_MEM_RD_CNT__GFX10 = 0x000001ee, - SP_PERF_SEL_PERF_MEM_WR_CNT__GFX10 = 0x000001ef, - SP_PERF_SEL_PERF_THREAD_REDUNDANT__GFX10 = 0x000001f0, - SP_PERF_SEL_PERF_OP_W_1_PASS__GFX10 = 0x000001f1, - SP_PERF_SEL_PERF_OP_W_2_PASS__GFX10 = 0x000001f2, - SP_PERF_SEL_PERF_OP_W_4_PASS__GFX10 = 0x000001f3, - SP_PERF_SEL_PERF_OP_W_16_PASS__GFX10 = 0x000001f4, - SP_PERF_SEL_PERF_COEXEC__GFX10 = 0x000001f5, - SP_PERF_SEL_PERF_ACTIVE_THREAD__GFX10 = 0x000001f6, - SP_PERF_SEL_PERF_ALL_ACTIVE__GFX10 = 0x000001f7, - SP_PERF_SEL_PERF_ZERO_P_ZERO__GFX10 = 0x000001f8, - SP_PERF_SEL_PERF_TRANS_OP__GFX10 = 0x000001f9, - SP_PERF_SEL_PERF_OP_W_MAD__GFX10 = 0x000001fa, - SP_PERF_SEL_PERF_OP_W_MUL__GFX10 = 0x000001fb, - SP_PERF_SEL_PERF_OP_W_ADD__GFX10 = 0x000001fc, - SQ_PERF_SEL_WAIT_SLEEP_XNACK__GFX101 = 0x0000002f, - SQ_PERF_SEL_INSTS_TEX_REPLAY__GFX101 = 0x00000059, - SQ_PERF_SEL_INSTS_SMEM_REPLAY__GFX101 = 0x0000005a, - SQ_PERF_SEL_INSTS_FLAT_REPLAY__GFX101 = 0x0000005c, - SQ_PERF_SEL_TA_XNACK_ALL__GFX101 = 0x0000005d, - SQ_PERF_SEL_TA_XNACK_FIRST__GFX101 = 0x0000005e, - SQ_PERF_SEL_IFETCH_XNACK__GFX101 = 0x00000069, - SQ_PERF_SEL_MIXED_SUBSEQUENT_ISSUES_VALU__GFX101 = 0x0000009d, - SQ_PERF_SEL_MIXED_SUBSEQUENT_ISSUES_SALU__GFX101 = 0x0000009e, - SQ_PERF_SEL_MIXED_SUBSEQUENT_ISSUES_VMEM__GFX101 = 0x0000009f, - SQ_PERF_SEL_VMEM_SECOND_TRY_USED__GFX101 = 0x000000c1, - SQ_PERF_SEL_VMEM_SECOND_TRY_STALL__GFX101 = 0x000000c2, - SQ_PERF_SEL_DUMMY_END__GFX101 = 0x000000c3, - SQG_PERF_SEL_TLB_SHOOTDOWN__GFX101 = 0x0000010c, - SQG_PERF_SEL_TLB_SHOOTDOWN_CYCLES__GFX101 = 0x0000010d, - SQC_PERF_SEL_POWER_VALU__GFX101 = 0x00000113, - SQC_PERF_SEL_POWER_VALU0__GFX101 = 0x00000114, - SQC_PERF_SEL_POWER_VALU1__GFX101 = 0x00000115, - SQC_PERF_SEL_POWER_VALU2__GFX101 = 0x00000116, - SQC_PERF_SEL_POWER_GPR_RD__GFX101 = 0x00000117, - SQC_PERF_SEL_POWER_GPR_WR__GFX101 = 0x00000118, - SQC_PERF_SEL_POWER_LDS_BUSY__GFX101 = 0x00000119, - SQC_PERF_SEL_POWER_ALU_BUSY__GFX101 = 0x0000011a, - SQC_PERF_SEL_POWER_TEX_BUSY__GFX101 = 0x0000011b, - SQC_PERF_SEL_PT_POWER_STALL__GFX101 = 0x0000011c, - SQC_PERF_SEL_TC_DATA_WRITE_REQ__GFX101 = 0x0000013f, - SQC_PERF_SEL_TC_DATA_ATOMIC_REQ__GFX101 = 0x00000140, - SQC_PERF_SEL_DCACHE_WC_LRU_WRITE__GFX101 = 0x00000155, - SQC_PERF_SEL_DCACHE_WT_EVICT_WRITE__GFX101 = 0x00000156, - SQC_PERF_SEL_DCACHE_ATOMIC__GFX101 = 0x00000157, - SQC_PERF_SEL_DCACHE_WB_INST__GFX101 = 0x00000158, - SQC_PERF_SEL_DCACHE_WB_ASYNC__GFX101 = 0x00000159, - SQC_PERF_SEL_DCACHE_REQ_WRITE_1__GFX101 = 0x0000016f, - SQC_PERF_SEL_DCACHE_REQ_WRITE_2__GFX101 = 0x00000170, - SQC_PERF_SEL_DCACHE_REQ_WRITE_4__GFX101 = 0x00000171, - SQC_PERF_SEL_DCACHE_REQ_ATC_PROBE__GFX101 = 0x00000172, - SQC_PERF_SEL_SQ_DCACHE_REQS__GFX101 = 0x00000173, - SQC_PERF_SEL_DCACHE_FLAT_REQ__GFX101 = 0x00000174, - SQC_PERF_SEL_DCACHE_NONFLAT_REQ__GFX101 = 0x00000175, - SQC_PERF_SEL_ICACHE_UTCL0_TRANSLATION_MISS__GFX101 = 0x00000176, - SQC_PERF_SEL_ICACHE_UTCL0_PERMISSION_MISS__GFX101 = 0x00000177, - SQC_PERF_SEL_ICACHE_UTCL0_TRANSLATION_HIT__GFX101 = 0x00000178, - SQC_PERF_SEL_ICACHE_UTCL0_REQUEST__GFX101 = 0x00000179, - SQC_PERF_SEL_ICACHE_UTCL0_XNACK__GFX101 = 0x0000017a, - SQC_PERF_SEL_ICACHE_UTCL0_STALL_INFLIGHT_MAX__GFX101 = 0x0000017b, - SQC_PERF_SEL_ICACHE_UTCL0_STALL_LRU_INFLIGHT__GFX101 = 0x0000017c, - SQC_PERF_SEL_ICACHE_UTCL0_LFIFO_FULL__GFX101 = 0x0000017d, - SQC_PERF_SEL_ICACHE_UTCL0_STALL_LFIFO_NOT_RES__GFX101 = 0x0000017e, - SQC_PERF_SEL_ICACHE_UTCL0_STALL_UTCL1_REQ_OUT_OF_CREDITS__GFX101 = 0x0000017f, - SQC_PERF_SEL_ICACHE_UTCL0_UTCL1_INFLIGHT__GFX101 = 0x00000180, - SQC_PERF_SEL_ICACHE_UTCL0_STALL_MISSFIFO_FULL__GFX101 = 0x00000181, - SQC_PERF_SEL_DCACHE_UTCL0_TRANSLATION_MISS__GFX101 = 0x00000182, - SQC_PERF_SEL_DCACHE_UTCL0_PERMISSION_MISS__GFX101 = 0x00000183, - SQC_PERF_SEL_DCACHE_UTCL0_TRANSLATION_HIT__GFX101 = 0x00000184, - SQC_PERF_SEL_DCACHE_UTCL0_REQUEST__GFX101 = 0x00000185, - SQC_PERF_SEL_DCACHE_UTCL0_XNACK__GFX101 = 0x00000186, - SQC_PERF_SEL_DCACHE_UTCL0_STALL_INFLIGHT_MAX__GFX101 = 0x00000187, - SQC_PERF_SEL_DCACHE_UTCL0_STALL_LRU_INFLIGHT__GFX101 = 0x00000188, - SQC_PERF_SEL_DCACHE_UTCL0_LFIFO_FULL__GFX101 = 0x00000189, - SQC_PERF_SEL_DCACHE_UTCL0_STALL_LFIFO_NOT_RES__GFX101 = 0x0000018a, - SQC_PERF_SEL_DCACHE_UTCL0_STALL_UTCL1_REQ_OUT_OF_CREDITS__GFX101 = 0x0000018b, - SQC_PERF_SEL_DCACHE_UTCL0_UTCL1_INFLIGHT__GFX101 = 0x0000018c, - SQC_PERF_SEL_DCACHE_UTCL0_STALL_MISSFIFO_FULL__GFX101 = 0x0000018d, - SQC_PERF_SEL_DCACHE_UTCL0_STALL_MULTI_MISS__GFX101 = 0x0000018e, - SQC_PERF_SEL_DCACHE_UTCL0_HIT_FIFO_FULL__GFX101 = 0x0000018f, - SQC_PERF_SEL_ICACHE_UTCL0_INFLIGHT_LEVEL__GFX101 = 0x00000190, - SQC_PERF_SEL_ICACHE_UTCL0_ALL_REQ__GFX101 = 0x00000191, - SQC_PERF_SEL_ICACHE_UTCL1_INFLIGHT_LEVEL__GFX101 = 0x00000192, - SQC_PERF_SEL_ICACHE_UTCL1_ALL_REQ__GFX101 = 0x00000193, - SQC_PERF_SEL_ICACHE_UTCL0_UTCL1_PERM_FAULT__GFX101 = 0x00000194, - SQC_PERF_SEL_DCACHE_UTCL0_INFLIGHT_LEVEL__GFX101 = 0x00000195, - SQC_PERF_SEL_DCACHE_UTCL0_ALL_REQ__GFX101 = 0x00000196, - SQC_PERF_SEL_DCACHE_UTCL1_INFLIGHT_LEVEL__GFX101 = 0x00000197, - SQC_PERF_SEL_DCACHE_UTCL1_ALL_REQ__GFX101 = 0x00000198, - SQC_PERF_SEL_DCACHE_UTCL0_UTCL1_PERM_FAULT__GFX101 = 0x00000199, - SQC_PERF_SEL_ICACHE_GCR__GFX101 = 0x0000019a, - SQC_PERF_SEL_ICACHE_GCR_HITS__GFX101 = 0x0000019b, - SQC_PERF_SEL_DCACHE_GCR__GFX101 = 0x0000019c, - SQC_PERF_SEL_DCACHE_GCR_HITS__GFX101 = 0x0000019d, - SQC_PERF_SEL_ICACHE_GCR_INVALIDATE__GFX101 = 0x0000019e, - SQC_PERF_SEL_DCACHE_GCR_INVALIDATE__GFX101 = 0x0000019f, - SQC_PERF_SEL_DCACHE_GCR_WRITEBACK__GFX101 = 0x000001a0, - SQC_PERF_SEL_DCACHE_SPI_RETURN_STALL__GFX101 = 0x000001a1, - SQC_PERF_SEL_DUMMY_LAST__GFX101 = 0x000001a2, - SQ_PERF_SEL_DUMMY_END__GFX103 = 0x000000c3, - SQC_PERF_SEL_Reserved_0x176__GFX103 = 0x00000177, - SQC_PERF_SEL_Reserved_0x177__GFX103 = 0x00000178, - SQC_PERF_SEL_Reserved_0x178__GFX103 = 0x00000179, - SQ_PERF_SEL_Reserved_0x02f__GFX103DERIVATIVE = 0x0000002f, - SQ_PERF_SEL_Reserved_0x059__GFX103DERIVATIVE = 0x00000059, - SQ_PERF_SEL_Reserved_0x05a__GFX103DERIVATIVE = 0x0000005a, - SQ_PERF_SEL_Reserved_0x05c__GFX103DERIVATIVE = 0x0000005c, - SQ_PERF_SEL_Reserved_0x05d__GFX103DERIVATIVE = 0x0000005d, - SQ_PERF_SEL_Reserved_0x05e__GFX103DERIVATIVE = 0x0000005e, - SQ_PERF_SEL_Reserved_0x069__GFX103DERIVATIVE = 0x00000069, - SQ_PERF_SEL_Reserved_22__GFX103DERIVATIVE = 0x0000009d, - SQ_PERF_SEL_Reserved_23__GFX103DERIVATIVE = 0x0000009e, - SQ_PERF_SEL_Reserved_24__GFX103DERIVATIVE = 0x0000009f, - SQ_PERF_SEL_VMEM_VGPR_READ_STALLED_BY_EXPORT__GFX103DERIVATIVE = 0x000000c1, - SQ_PERF_SEL_INSTS_VALU_TRANS__GFX103DERIVATIVE = 0x000000c2, - SQG_PERF_SEL_Reserved_0__GFX103DERIVATIVE = 0x0000010c, - SQG_PERF_SEL_TTRACE_LOST_PACKETS_NO_CH_CREDITS__GFX103DERIVATIVE = 0x0000010d, - SQC_PERF_SEL_POWER_Reserved_0__GFX103DERIVATIVE = 0x00000113, - SQC_PERF_SEL_POWER_Reserved_1__GFX103DERIVATIVE = 0x00000114, - SQC_PERF_SEL_POWER_Reserved_2__GFX103DERIVATIVE = 0x00000115, - SQC_PERF_SEL_POWER_Reserved_3__GFX103DERIVATIVE = 0x00000116, - SQC_PERF_SEL_POWER_Reserved_4__GFX103DERIVATIVE = 0x00000117, - SQC_PERF_SEL_POWER_Reserved_5__GFX103DERIVATIVE = 0x00000118, - SQC_PERF_SEL_POWER_Reserved_6__GFX103DERIVATIVE = 0x00000119, - SQC_PERF_SEL_POWER_Reserved_7__GFX103DERIVATIVE = 0x0000011a, - SQC_PERF_SEL_POWER_Reserved_8__GFX103DERIVATIVE = 0x0000011b, - SQC_PERF_SEL_POWER_Reserved_9__GFX103DERIVATIVE = 0x0000011c, - SQC_PERF_SEL_Reserved_0x13f__GFX103DERIVATIVE = 0x0000013f, - SQC_PERF_SEL_Reserved_0x140__GFX103DERIVATIVE = 0x00000140, - SQC_PERF_SEL_Reserved_0x155__GFX103DERIVATIVE = 0x00000155, - SQC_PERF_SEL_Reserved_0x156__GFX103DERIVATIVE = 0x00000156, - SQC_PERF_SEL_Reserved_0x157__GFX103DERIVATIVE = 0x00000157, - SQC_PERF_SEL_Reserved_0x158__GFX103DERIVATIVE = 0x00000158, - SQC_PERF_SEL_Reserved_0x159__GFX103DERIVATIVE = 0x00000159, - SQC_PERF_SEL_Reserved_0x16e__GFX103DERIVATIVE = 0x0000016f, - SQC_PERF_SEL_Reserved_0x16f__GFX103DERIVATIVE = 0x00000170, - SQC_PERF_SEL_Reserved_0x170__GFX103DERIVATIVE = 0x00000171, - SQC_PERF_SEL_Reserved_0x171__GFX103DERIVATIVE = 0x00000172, - SQC_PERF_SEL_DCACHE_REQ_ATC_PROBE__GFX103DERIVATIVE = 0x00000173, - SQC_PERF_SEL_SQ_DCACHE_REQS__GFX103DERIVATIVE = 0x00000174, - SQC_PERF_SEL_DCACHE_FLAT_REQ__GFX103DERIVATIVE = 0x00000175, - SQC_PERF_SEL_DCACHE_NONFLAT_REQ__GFX103DERIVATIVE = 0x00000176, - SQC_PERF_SEL_Reserved_0x179__GFX103DERIVATIVE = 0x0000017a, - SQC_PERF_SEL_Reserved_0x17a__GFX103DERIVATIVE = 0x0000017b, - SQC_PERF_SEL_Reserved_0x17b__GFX103DERIVATIVE = 0x0000017c, - SQC_PERF_SEL_Reserved_0x17c__GFX103DERIVATIVE = 0x0000017d, - SQC_PERF_SEL_Reserved_0x17d__GFX103DERIVATIVE = 0x0000017e, - SQC_PERF_SEL_Reserved_0x17e__GFX103DERIVATIVE = 0x0000017f, - SQC_PERF_SEL_Reserved_0x17f__GFX103DERIVATIVE = 0x00000180, - SQC_PERF_SEL_Reserved_0x180__GFX103DERIVATIVE = 0x00000181, - SQC_PERF_SEL_Reserved_0x181__GFX103DERIVATIVE = 0x00000182, - SQC_PERF_SEL_Reserved_0x182__GFX103DERIVATIVE = 0x00000183, - SQC_PERF_SEL_Reserved_0x183__GFX103DERIVATIVE = 0x00000184, - SQC_PERF_SEL_Reserved_0x184__GFX103DERIVATIVE = 0x00000185, - SQC_PERF_SEL_Reserved_0x185__GFX103DERIVATIVE = 0x00000186, - SQC_PERF_SEL_Reserved_0x186__GFX103DERIVATIVE = 0x00000187, - SQC_PERF_SEL_Reserved_0x187__GFX103DERIVATIVE = 0x00000188, - SQC_PERF_SEL_Reserved_0x188__GFX103DERIVATIVE = 0x00000189, - SQC_PERF_SEL_Reserved_0x189__GFX103DERIVATIVE = 0x0000018a, - SQC_PERF_SEL_Reserved_0x18a__GFX103DERIVATIVE = 0x0000018b, - SQC_PERF_SEL_Reserved_0x18b__GFX103DERIVATIVE = 0x0000018c, - SQC_PERF_SEL_Reserved_0x18c__GFX103DERIVATIVE = 0x0000018d, - SQC_PERF_SEL_Reserved_0x18d__GFX103DERIVATIVE = 0x0000018e, - SQC_PERF_SEL_Reserved_0x18e__GFX103DERIVATIVE = 0x0000018f, - SQC_PERF_SEL_Reserved_0x18f__GFX103DERIVATIVE = 0x00000190, - SQC_PERF_SEL_Reserved_0x190__GFX103DERIVATIVE = 0x00000191, - SQC_PERF_SEL_Reserved_0x191__GFX103DERIVATIVE = 0x00000192, - SQC_PERF_SEL_Reserved_0x192__GFX103DERIVATIVE = 0x00000193, - SQC_PERF_SEL_Reserved_0x193__GFX103DERIVATIVE = 0x00000194, - SQC_PERF_SEL_Reserved_0x194__GFX103DERIVATIVE = 0x00000195, - SQC_PERF_SEL_Reserved_0x195__GFX103DERIVATIVE = 0x00000196, - SQC_PERF_SEL_Reserved_0x196__GFX103DERIVATIVE = 0x00000197, - SQC_PERF_SEL_Reserved_0x197__GFX103DERIVATIVE = 0x00000198, - SQC_PERF_SEL_Reserved_0x198__GFX103DERIVATIVE = 0x00000199, - SQC_PERF_SEL_Reserved_0x199__GFX103DERIVATIVE = 0x0000019a, - SQC_PERF_SEL_ICACHE_GCR__GFX103DERIVATIVE = 0x0000019b, - SQC_PERF_SEL_ICACHE_GCR_HITS__GFX103DERIVATIVE = 0x0000019c, - SQC_PERF_SEL_DCACHE_GCR__GFX103DERIVATIVE = 0x0000019d, - SQC_PERF_SEL_DCACHE_GCR_HITS__GFX103DERIVATIVE = 0x0000019e, - SQC_PERF_SEL_ICACHE_GCR_INVALIDATE__GFX103DERIVATIVE = 0x0000019f, - SQC_PERF_SEL_DCACHE_GCR_INVALIDATE__GFX103DERIVATIVE = 0x000001a0, - SQC_PERF_SEL_Reserved_0x1a0__GFX103DERIVATIVE = 0x000001a1, - SQC_PERF_SEL_DCACHE_SPI_RETURN_STALL__GFX103DERIVATIVE = 0x000001a2, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SQ_PERF_SEL_WAVES_INITIAL_PREFETCH__GFX104PLUS = 0x00000017, - SQ_PERF_SEL_NONE2__GFX104PLUS = 0x000001ff, -#endif - SQ_PERF_SEL_Reserved_1__GFX10CORE = 0x00000017, - SQ_PERF_SEL_INSTS_WAVE32_EXP_GDS__GFX10CORE = 0x0000004c, - SQ_PERF_SEL_INSTS_VALU_LDS_DIRECT_RD__GFX10CORE = 0x0000005f, - SQ_PERF_SEL_INSTS_VALU_VINTRP_OP__GFX10CORE = 0x00000060, - SP_PERF_SEL_DUMMY_LAST__GFX10CORE = 0x000001ff, - SQ_PERF_SEL_WAVES_32__GFX10PLUS = 0x00000005, - SQ_PERF_SEL_WAVES_64__GFX10PLUS = 0x00000006, - SQ_PERF_SEL_LEVEL_WAVES__GFX10PLUS = 0x00000007, - SQ_PERF_SEL_ITEMS__GFX10PLUS = 0x00000008, - SQ_PERF_SEL_WAVE32_ITEMS__GFX10PLUS = 0x00000009, - SQ_PERF_SEL_WAVE64_ITEMS__GFX10PLUS = 0x0000000a, - SQ_PERF_SEL_EVENTS__GFX10PLUS = 0x0000000c, - SQ_PERF_SEL_MSG_INTERRUPT__GFX10PLUS = 0x00000016, - SQC_PERF_SEL_DUMMY_LAST__GFX10VRS = 0x000001a3, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SQ_PERF_SEL_PS_QUADS__GFX11 = 0x0000000b, - SQ_PERF_SEL_WAVES_EQ_32__GFX11 = 0x0000000d, - SQ_PERF_SEL_WAVES_EQ_64__GFX11 = 0x0000000e, - SQ_PERF_SEL_WAVES_LT_64__GFX11 = 0x0000000f, - SQ_PERF_SEL_WAVES_LT_48__GFX11 = 0x00000010, - SQ_PERF_SEL_WAVES_LT_32__GFX11 = 0x00000011, - SQ_PERF_SEL_WAVES_LT_16__GFX11 = 0x00000012, - SQ_PERF_SEL_WAVES_RESTORED__GFX11 = 0x00000013, - SQ_PERF_SEL_WAVES_SAVED__GFX11 = 0x00000014, - SQ_PERF_SEL_MSG__GFX11 = 0x00000015, - SQ_PERF_SEL_WAVE_CYCLES__GFX11 = 0x00000018, - SQ_PERF_SEL_WAVE_READY__GFX11 = 0x00000019, - SQ_PERF_SEL_WAIT_INST_ANY__GFX11 = 0x0000001a, - SQ_PERF_SEL_WAIT_INST_VALU__GFX11 = 0x0000001b, - SQ_PERF_SEL_WAIT_INST_SCA__GFX11 = 0x0000001c, - SQ_PERF_SEL_WAIT_INST_LDS__GFX11 = 0x0000001d, - SQ_PERF_SEL_WAIT_INST_TEX__GFX11 = 0x0000001e, - SQ_PERF_SEL_WAIT_INST_FLAT__GFX11 = 0x0000001f, - SQ_PERF_SEL_WAIT_INST_VMEM__GFX11 = 0x00000020, - SQ_PERF_SEL_WAIT_INST_EXP_GDS__GFX11 = 0x00000021, - SQ_PERF_SEL_WAIT_INST_BR_MSG__GFX11 = 0x00000022, - SQ_PERF_SEL_WAIT_ANY__GFX11 = 0x00000023, - SQ_PERF_SEL_WAIT_CNT_ANY__GFX11 = 0x00000024, - SQ_PERF_SEL_WAIT_CNT_VMVS__GFX11 = 0x00000025, - SQ_PERF_SEL_WAIT_CNT_LGKM__GFX11 = 0x00000026, - SQ_PERF_SEL_WAIT_CNT_EXP__GFX11 = 0x00000027, - SQ_PERF_SEL_WAIT_TTRACE__GFX11 = 0x00000028, - SQ_PERF_SEL_WAIT_IFETCH__GFX11 = 0x00000029, - SQ_PERF_SEL_WAIT_BARRIER__GFX11 = 0x0000002a, - SQ_PERF_SEL_WAIT_EXP_ALLOC__GFX11 = 0x0000002b, - SQ_PERF_SEL_WAIT_SLEEP__GFX11 = 0x0000002c, - SQ_PERF_SEL_WAIT_DELAY_ALU__GFX11 = 0x0000002d, - SQ_PERF_SEL_WAIT_DEPCTR__GFX11 = 0x0000002e, - SQ_PERF_SEL_WAIT_OTHER__GFX11 = 0x0000002f, - SQ_PERF_SEL_INSTS_ALL__GFX11 = 0x00000030, - SQ_PERF_SEL_INSTS_BRANCH__GFX11 = 0x00000031, - SQ_PERF_SEL_INSTS_CBRANCH_NOT_TAKEN__GFX11 = 0x00000032, - SQ_PERF_SEL_INSTS_CBRANCH_TAKEN__GFX11 = 0x00000033, - SQ_PERF_SEL_INSTS_CBRANCH_TAKEN_HIT_IS__GFX11 = 0x00000034, - SQ_PERF_SEL_INSTS_EXP_GDS__GFX11 = 0x00000035, - SQ_PERF_SEL_INSTS_GDS__GFX11 = 0x00000036, - SQ_PERF_SEL_INSTS_EXP__GFX11 = 0x00000037, - SQ_PERF_SEL_INSTS_FLAT__GFX11 = 0x00000038, - SQ_PERF_SEL_INSTS_LDS__GFX11 = 0x00000039, - SQ_PERF_SEL_INSTS_SALU__GFX11 = 0x0000003a, - SQ_PERF_SEL_INSTS_SMEM__GFX11 = 0x0000003b, - SQ_PERF_SEL_INSTS_SMEM_NORM__GFX11 = 0x0000003c, - SQ_PERF_SEL_INSTS_SENDMSG__GFX11 = 0x0000003d, - SQ_PERF_SEL_INSTS_VALU__GFX11 = 0x0000003e, - SQ_PERF_SEL_INSTS_VALU_TRANS32__GFX11 = 0x0000003f, - SQ_PERF_SEL_INSTS_VALU_NO_COEXEC__GFX11 = 0x00000040, - SQ_PERF_SEL_INSTS_TEX__GFX11 = 0x00000041, - SQ_PERF_SEL_INSTS_TEX_LOAD__GFX11 = 0x00000042, - SQ_PERF_SEL_INSTS_TEX_STORE__GFX11 = 0x00000043, - SQ_PERF_SEL_INSTS_DELAY_ALU__GFX11 = 0x00000044, - SQ_PERF_SEL_INSTS_INTERNAL__GFX11 = 0x00000045, - SQ_PERF_SEL_INSTS_WAVE32__GFX11 = 0x00000046, - SQ_PERF_SEL_INSTS_WAVE32_FLAT__GFX11 = 0x00000047, - SQ_PERF_SEL_INSTS_WAVE32_LDS__GFX11 = 0x00000048, - SQ_PERF_SEL_INSTS_WAVE32_VALU__GFX11 = 0x00000049, - SQ_PERF_SEL_WAVE32_INSTS_EXP_GDS__GFX11 = 0x0000004a, - SQ_PERF_SEL_INSTS_WAVE32_VALU_TRANS32__GFX11 = 0x0000004b, - SQ_PERF_SEL_INSTS_WAVE32_VALU_NO_COEXEC__GFX11 = 0x0000004c, - SQ_PERF_SEL_INSTS_WAVE32_TEX__GFX11 = 0x0000004d, - SQ_PERF_SEL_INSTS_WAVE32_TEX_LOAD__GFX11 = 0x0000004e, - SQ_PERF_SEL_INSTS_WAVE32_TEX_STORE__GFX11 = 0x0000004f, - SQ_PERF_SEL_ITEM_CYCLES_VALU__GFX11 = 0x00000050, - SQ_PERF_SEL_VALU_READWRITELANE_CYCLES__GFX11 = 0x00000051, - SQ_PERF_SEL_WAVE32_INSTS__GFX11 = 0x00000052, - SQ_PERF_SEL_WAVE64_INSTS__GFX11 = 0x00000053, - SQ_PERF_SEL_INSTS_VALU_EXEC_SKIPPED__GFX11 = 0x00000054, - SQ_PERF_SEL_WAVE64_HALF_SKIP__GFX11 = 0x00000055, - SQ_PERF_SEL_INST_LEVEL_EXP__GFX11 = 0x00000056, - SQ_PERF_SEL_INST_LEVEL_GDS__GFX11 = 0x00000057, - SQ_PERF_SEL_INST_LEVEL_LDS__GFX11 = 0x00000058, - SQ_PERF_SEL_INST_LEVEL_SMEM__GFX11 = 0x00000059, - SQ_PERF_SEL_INST_LEVEL_TEX_LOAD__GFX11 = 0x0000005a, - SQ_PERF_SEL_INST_LEVEL_TEX_STORE__GFX11 = 0x0000005b, - SQ_PERF_SEL_IFETCH_REQS__GFX11 = 0x0000005c, - SQ_PERF_SEL_IFETCH_LEVEL__GFX11 = 0x0000005d, - SQ_PERF_SEL_LDS_DIRECT_CMD_FIFO_FULL_STALL__GFX11 = 0x0000005e, - SQ_PERF_SEL_VALU_SGATHER_STALL__GFX11 = 0x0000005f, - SQ_PERF_SEL_VALU_FWD_BUFFER_FULL_STALL__GFX11 = 0x00000060, - SQ_PERF_SEL_VALU_SGPR_RD_FIFO_FULL_STALL__GFX11 = 0x00000061, - SQ_PERF_SEL_VALU_SGATHER_FULL_STALL__GFX11 = 0x00000062, - SQ_PERF_SEL_SALU_SGATHER_STALL__GFX11 = 0x00000063, - SQ_PERF_SEL_SALU_SGPR_RD_FIFO_FULL_STALL__GFX11 = 0x00000064, - SQ_PERF_SEL_SALU_GATHER_FULL_STALL__GFX11 = 0x00000065, - SQ_PERF_SEL_SMEM_DCACHE_FIFO_FULL_STALL__GFX11 = 0x00000066, - SQ_PERF_SEL_INST_CYCLES_VALU__GFX11 = 0x00000067, - SQ_PERF_SEL_INST_CYCLES_VALU_TRANS32__GFX11 = 0x00000068, - SQ_PERF_SEL_INST_CYCLES_VALU_NO_COEXEC__GFX11 = 0x00000069, - SQ_PERF_SEL_INST_CYCLES_VMEM__GFX11 = 0x0000006a, - SQ_PERF_SEL_INST_CYCLES_VMEM_LOAD__GFX11 = 0x0000006b, - SQ_PERF_SEL_INST_CYCLES_VMEM_STORE__GFX11 = 0x0000006c, - SQ_PERF_SEL_INST_CYCLES_LDS__GFX11 = 0x0000006d, - SQ_PERF_SEL_INST_CYCLES_TEX__GFX11 = 0x0000006e, - SQ_PERF_SEL_INST_CYCLES_FLAT__GFX11 = 0x0000006f, - SQ_PERF_SEL_INST_CYCLES_EXP_GDS__GFX11 = 0x00000070, - SQ_PERF_SEL_INST_CYCLES_EXP__GFX11 = 0x00000071, - SQ_PERF_SEL_INST_CYCLES_GDS__GFX11 = 0x00000072, - SQ_PERF_SEL_VALU_STARVE__GFX11 = 0x00000073, - SQ_PERF_SEL_VMEM_ARB_FIFO_FULL__GFX11 = 0x00000074, - SQ_PERF_SEL_MSG_FIFO_FULL_STALL__GFX11 = 0x00000075, - SQ_PERF_SEL_EXP_REQ_FIFO_FULL__GFX11 = 0x00000076, - SQ_PERF_SEL_VMEM_BUS_ACTIVE__GFX11 = 0x00000077, - SQ_PERF_SEL_VMEM_BUS_STALL__GFX11 = 0x00000078, - SQ_PERF_SEL_VMEM_BUS_STALL_TA_ADDR_FIFO_FULL__GFX11 = 0x00000079, - SQ_PERF_SEL_VMEM_BUS_STALL_TA_CMD_FIFO_FULL__GFX11 = 0x0000007a, - SQ_PERF_SEL_VMEM_BUS_STALL_LDS_ADDR_FIFO_FULL__GFX11 = 0x0000007b, - SQ_PERF_SEL_VMEM_BUS_STALL_LDS_CMD_FIFO_FULL__GFX11 = 0x0000007c, - SQ_PERF_SEL_VMEM_STARVE_TA_ADDR_EMPTY__GFX11 = 0x0000007d, - SQ_PERF_SEL_VMEM_STARVE_LDS_ADDR_EMPTY__GFX11 = 0x0000007e, - SQ_PERF_SEL_SALU_PIPE_STALL__GFX11 = 0x0000007f, - SQ_PERF_SEL_SMEM_DCACHE_RETURN_CYCLES__GFX11 = 0x00000080, - SQ_PERF_SEL_MSG_BUS_BUSY__GFX11 = 0x00000081, - SQ_PERF_SEL_EXP_REQ_BUS_STALL__GFX11 = 0x00000082, - SQ_PERF_SEL_EXP_REQ0_BUS_BUSY__GFX11 = 0x00000083, - SQ_PERF_SEL_EXP_REQ1_BUS_BUSY__GFX11 = 0x00000084, - SQ_PERF_SEL_EXP_BUS0_BUSY__GFX11 = 0x00000085, - SQ_PERF_SEL_EXP_BUS1_BUSY__GFX11 = 0x00000086, - SQ_PERF_SEL_INST_CACHE_REQ_STALL__GFX11 = 0x00000087, - SQ_PERF_SEL_USER0__GFX11 = 0x00000088, - SQ_PERF_SEL_USER1__GFX11 = 0x00000089, - SQ_PERF_SEL_USER2__GFX11 = 0x0000008a, - SQ_PERF_SEL_USER3__GFX11 = 0x0000008b, - SQ_PERF_SEL_USER4__GFX11 = 0x0000008c, - SQ_PERF_SEL_USER5__GFX11 = 0x0000008d, - SQ_PERF_SEL_USER6__GFX11 = 0x0000008e, - SQ_PERF_SEL_USER7__GFX11 = 0x0000008f, - SQ_PERF_SEL_USER8__GFX11 = 0x00000090, - SQ_PERF_SEL_USER9__GFX11 = 0x00000091, - SQ_PERF_SEL_USER10__GFX11 = 0x00000092, - SQ_PERF_SEL_USER11__GFX11 = 0x00000093, - SQ_PERF_SEL_USER12__GFX11 = 0x00000094, - SQ_PERF_SEL_USER13__GFX11 = 0x00000095, - SQ_PERF_SEL_USER14__GFX11 = 0x00000096, - SQ_PERF_SEL_USER15__GFX11 = 0x00000097, - SQ_PERF_SEL_USER_LEVEL0__GFX11 = 0x00000098, - SQ_PERF_SEL_USER_LEVEL1__GFX11 = 0x00000099, - SQ_PERF_SEL_USER_LEVEL2__GFX11 = 0x0000009a, - SQ_PERF_SEL_USER_LEVEL3__GFX11 = 0x0000009b, - SQ_PERF_SEL_USER_LEVEL4__GFX11 = 0x0000009c, - SQ_PERF_SEL_USER_LEVEL5__GFX11 = 0x0000009d, - SQ_PERF_SEL_USER_LEVEL6__GFX11 = 0x0000009e, - SQ_PERF_SEL_USER_LEVEL7__GFX11 = 0x0000009f, - SQ_PERF_SEL_USER_LEVEL8__GFX11 = 0x000000a0, - SQ_PERF_SEL_USER_LEVEL9__GFX11 = 0x000000a1, - SQ_PERF_SEL_USER_LEVEL10__GFX11 = 0x000000a2, - SQ_PERF_SEL_USER_LEVEL11__GFX11 = 0x000000a3, - SQ_PERF_SEL_USER_LEVEL12__GFX11 = 0x000000a4, - SQ_PERF_SEL_USER_LEVEL13__GFX11 = 0x000000a5, - SQ_PERF_SEL_USER_LEVEL14__GFX11 = 0x000000a6, - SQ_PERF_SEL_USER_LEVEL15__GFX11 = 0x000000a7, - SQ_PERF_SEL_VALU_RETURN_SDST__GFX11 = 0x000000a8, - SQ_PERF_SEL_VMEM_VGPR_READ_STALLED_BY_EXPORT__GFX11 = 0x000000a9, - SQ_PERF_SEL_INSTS_VALU_TRANS__GFX11 = 0x000000aa, - SQ_PERF_SEL_INSTS_LDS_DIRECT_LOAD__GFX11 = 0x000000ab, - SQ_PERF_SEL_INSTS_LDS_PARAM_LOAD__GFX11 = 0x000000ac, - SQ_PERF_SEL_INSTS_WAVE32_LDS_PARAM_LOAD__GFX11 = 0x000000ad, - SQ_PERF_SEL_INSTS_VALU_ONE_CYCLE_WAVE64__GFX11 = 0x000000ae, - SQ_PERF_SEL_INSTS_VALU_VINTERP__GFX11 = 0x000000af, - SQ_PERF_SEL_INSTS_VALU_WAVE32_VINTERP__GFX11 = 0x000000b0, - SQ_PERF_SEL_OVERFLOW_PREV__GFX11 = 0x000000b1, - SQ_PERF_SEL_INSTS_DUAL_VALU_WAVE32__GFX11 = 0x000000b2, - SQ_PERF_SEL_INSTS_VALU_1_PASS__GFX11 = 0x000000b3, - SQ_PERF_SEL_INSTS_VALU_2_PASS__GFX11 = 0x000000b4, - SQ_PERF_SEL_INSTS_VALU_4_PASS__GFX11 = 0x000000b5, - SQ_PERF_SEL_INSTS_VALU_DP__GFX11 = 0x000000b6, - SQ_PERF_SEL_SP_CONST_CYCLES__GFX11 = 0x000000b7, - SQ_PERF_SEL_SP_CONST_STALL_CYCLES__GFX11 = 0x000000b8, - SQ_PERF_SEL_ITEMS_VALU__GFX11 = 0x000000b9, - SQ_PERF_SEL_ITEMS_MAX_VALU__GFX11 = 0x000000ba, - SQ_PERF_SEL_ITEM_CYCLES_VMEM__GFX11 = 0x000000bb, - SQ_PERF_SEL_INSTS_DELAY_ALU_COISSUE__GFX11 = 0x000000bc, - SQ_PERF_SEL_INSTS_FLAT_LOAD__GFX11 = 0x000000bd, - SQ_PERF_SEL_INSTS_FLAT_STORE__GFX11 = 0x000000be, - SQ_PERF_SEL_INSTS_VALU_ONE_CYCLE_WAVE64_16BIT__GFX11 = 0x000000bf, - SQ_PERF_SEL_INSTS_VALU_ONE_CYCLE_WAVE64_32BIT__GFX11 = 0x000000c0, - SQ_PERF_SEL_INSTS_NON_VALU_EXEC_SKIPPED__GFX11 = 0x000000c1, - SQ_PERF_SEL_INSTS_BARRIER__GFX11 = 0x000000c2, - SQ_PERF_SEL_INSTS_WAKEUP__GFX11 = 0x000000c3, - SQ_PERF_SEL_DUMMY_END__GFX11 = 0x000000c4, - SQC_PERF_SEL_LDS_BANK_CONFLICT__GFX11 = 0x00000100, - SQC_PERF_SEL_LDS_ADDR_CONFLICT__GFX11 = 0x00000101, - SQC_PERF_SEL_LDS_UNALIGNED_STALL__GFX11 = 0x00000102, - SQC_PERF_SEL_LDS_MEM_VIOLATIONS__GFX11 = 0x00000103, - SQC_PERF_SEL_LDS_ATOMIC_RETURN__GFX11 = 0x00000104, - SQC_PERF_SEL_LDS_IDX_ACTIVE__GFX11 = 0x00000105, - SQC_PERF_SEL_LDS_ADDR_STALL__GFX11 = 0x00000106, - SQC_PERF_SEL_LDS_ADDR_ACTIVE__GFX11 = 0x00000107, - SQC_PERF_SEL_LDS_PC_LDS_WRITE_STALL_TD__GFX11 = 0x00000108, - SQC_PERF_SEL_LDS_SPI_VGPR_WRITE_STALL_TD__GFX11 = 0x00000109, - SQC_PERF_SEL_LDS_LDS_VGPR_WRITE_STALL__GFX11 = 0x0000010a, - SQC_PERF_SEL_LDS_FP_ADD_CYCLES__GFX11 = 0x0000010b, - SQC_PERF_SEL_ICACHE_BUSY_CYCLES__GFX11 = 0x0000010c, - SQC_PERF_SEL_ICACHE_REQ__GFX11 = 0x0000010d, - SQC_PERF_SEL_ICACHE_HITS__GFX11 = 0x0000010e, - SQC_PERF_SEL_ICACHE_MISSES__GFX11 = 0x0000010f, - SQC_PERF_SEL_ICACHE_MISSES_DUPLICATE__GFX11 = 0x00000110, - SQC_PERF_SEL_ICACHE_INVAL_INST__GFX11 = 0x00000111, - SQC_PERF_SEL_ICACHE_INVAL_ASYNC__GFX11 = 0x00000112, - SQC_PERF_SEL_ICACHE_INFLIGHT_LEVEL__GFX11 = 0x00000113, - SQC_PERF_SEL_DCACHE_INFLIGHT_LEVEL__GFX11 = 0x00000114, - SQC_PERF_SEL_TC_INFLIGHT_LEVEL__GFX11 = 0x00000115, - SQC_PERF_SEL_ICACHE_TC_INFLIGHT_LEVEL__GFX11 = 0x00000116, - SQC_PERF_SEL_DCACHE_TC_INFLIGHT_LEVEL__GFX11 = 0x00000117, - SQC_PERF_SEL_ICACHE_INPUT_VALID_READYB__GFX11 = 0x00000118, - SQC_PERF_SEL_DCACHE_INPUT_VALID_READYB__GFX11 = 0x00000119, - SQC_PERF_SEL_TC_REQ__GFX11 = 0x0000011a, - SQC_PERF_SEL_TC_INST_REQ__GFX11 = 0x0000011b, - SQC_PERF_SEL_TC_DATA_READ_REQ__GFX11 = 0x0000011c, - SQC_PERF_SEL_TC_STALL__GFX11 = 0x0000011d, - SQC_PERF_SEL_TC_STARVE__GFX11 = 0x0000011e, - SQC_PERF_SEL_ICACHE_INPUT_STALL_ARB_NO_GRANT__GFX11 = 0x0000011f, - SQC_PERF_SEL_ICACHE_INPUT_STALL_BANK_READYB__GFX11 = 0x00000120, - SQC_PERF_SEL_ICACHE_CACHE_STALLED__GFX11 = 0x00000121, - SQC_PERF_SEL_ICACHE_CACHE_STALL_INFLIGHT_MAX__GFX11 = 0x00000122, - SQC_PERF_SEL_ICACHE_STALL_OUTXBAR_ARB_NO_GRANT__GFX11 = 0x00000123, - SQC_PERF_SEL_DCACHE_BUSY_CYCLES__GFX11 = 0x00000124, - SQC_PERF_SEL_DCACHE_REQ__GFX11 = 0x00000125, - SQC_PERF_SEL_DCACHE_HITS__GFX11 = 0x00000126, - SQC_PERF_SEL_DCACHE_MISSES__GFX11 = 0x00000127, - SQC_PERF_SEL_DCACHE_MISSES_DUPLICATE__GFX11 = 0x00000128, - SQC_PERF_SEL_DCACHE_INVAL_INST__GFX11 = 0x00000129, - SQC_PERF_SEL_DCACHE_INVAL_ASYNC__GFX11 = 0x0000012a, - SQC_PERF_SEL_DCACHE_HIT_LRU_READ__GFX11 = 0x0000012b, - SQC_PERF_SEL_DCACHE_INPUT_STALL_ARB_NO_GRANT__GFX11 = 0x0000012c, - SQC_PERF_SEL_DCACHE_INPUT_STALL_BANK_READYB__GFX11 = 0x0000012d, - SQC_PERF_SEL_DCACHE_CACHE_STALLED__GFX11 = 0x0000012e, - SQC_PERF_SEL_DCACHE_CACHE_STALL_INFLIGHT_MAX__GFX11 = 0x0000012f, - SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT__GFX11 = 0x00000130, - SQC_PERF_SEL_DCACHE_STALL_OUTXBAR_ARB_NO_GRANT__GFX11 = 0x00000131, - SQC_PERF_SEL_DCACHE_REQ_READ_1__GFX11 = 0x00000132, - SQC_PERF_SEL_DCACHE_REQ_READ_2__GFX11 = 0x00000133, - SQC_PERF_SEL_DCACHE_REQ_READ_4__GFX11 = 0x00000134, - SQC_PERF_SEL_DCACHE_REQ_READ_8__GFX11 = 0x00000135, - SQC_PERF_SEL_DCACHE_REQ_READ_16__GFX11 = 0x00000136, - SQC_PERF_SEL_DCACHE_REQ_ATC_PROBE__GFX11 = 0x00000137, - SQC_PERF_SEL_SQ_DCACHE_REQS__GFX11 = 0x00000138, - SQC_PERF_SEL_DCACHE_FLAT_REQ__GFX11 = 0x00000139, - SQC_PERF_SEL_TD_VGPR_BUSY__GFX11 = 0x0000013a, - SQC_PERF_SEL_LDS_VGPR_BUSY__GFX11 = 0x0000013b, - SQC_PERF_SEL_LDS_TD_VGPR_CONF_STALL__GFX11 = 0x0000013c, - SQC_PERF_SEL_ICACHE_GCR__GFX11 = 0x0000013d, - SQC_PERF_SEL_ICACHE_GCR_HITS__GFX11 = 0x0000013e, - SQC_PERF_SEL_DCACHE_GCR__GFX11 = 0x0000013f, - SQC_PERF_SEL_DCACHE_GCR_HITS__GFX11 = 0x00000140, - SQC_PERF_SEL_ICACHE_GCR_INVALIDATE__GFX11 = 0x00000141, - SQC_PERF_SEL_DCACHE_GCR_INVALIDATE__GFX11 = 0x00000142, - SQC_PERF_SEL_DCACHE_SPI_RETURN_STALL__GFX11 = 0x00000143, - SQC_PERF_SEL_DUMMY_LAST__GFX11 = 0x00000144, - SP_PERF_SEL_DST_BUF_ALLOC_STALL__GFX11 = 0x000001c0, - SP_PERF_SEL_DST_BUF_WB_CONF_W_TD_LDS__GFX11 = 0x000001c1, - SP_PERF_SEL_DST_BUF_WB_CONF_W_SPI__GFX11 = 0x000001c2, - SP_PERF_SEL_DST_BUF_EVEN_DIRTY__GFX11 = 0x000001c3, - SP_PERF_SEL_DST_BUF_ODD_DIRTY__GFX11 = 0x000001c4, - SP_PERF_SEL_SRC_CACHE_HIT_B0__GFX11 = 0x000001c5, - SP_PERF_SEL_SRC_CACHE_HIT_B1__GFX11 = 0x000001c6, - SP_PERF_SEL_SRC_CACHE_HIT_B2__GFX11 = 0x000001c7, - SP_PERF_SEL_SRC_CACHE_HIT_B3__GFX11 = 0x000001c8, - SP_PERF_SEL_SRC_CACHE_PROBE_B0__GFX11 = 0x000001c9, - SP_PERF_SEL_SRC_CACHE_PROBE_B1__GFX11 = 0x000001ca, - SP_PERF_SEL_SRC_CACHE_PROBE_B2__GFX11 = 0x000001cb, - SP_PERF_SEL_SRC_CACHE_PROBE_B3__GFX11 = 0x000001cc, - SP_PERF_SEL_SRC_CACHE_VGPR_RD_B0__GFX11 = 0x000001cd, - SP_PERF_SEL_SRC_CACHE_VGPR_RD_B1__GFX11 = 0x000001ce, - SP_PERF_SEL_SRC_CACHE_VGPR_RD_B2__GFX11 = 0x000001cf, - SP_PERF_SEL_SRC_CACHE_VGPR_RD_B3__GFX11 = 0x000001d0, - SP_PERF_SEL_SRC_CACHE_RECYCLE_HIT_B0__GFX11 = 0x000001d1, - SP_PERF_SEL_SRC_CACHE_RECYCLE_HIT_B1__GFX11 = 0x000001d2, - SP_PERF_SEL_SRC_CACHE_RECYCLE_HIT_B2__GFX11 = 0x000001d3, - SP_PERF_SEL_SRC_CACHE_RECYCLE_HIT_B3__GFX11 = 0x000001d4, - SP_PERF_SEL_SRC_CACHE_PROBE_SUCCESS_B0__GFX11 = 0x000001d5, - SP_PERF_SEL_SRC_CACHE_PROBE_SUCCESS_B1__GFX11 = 0x000001d6, - SP_PERF_SEL_SRC_CACHE_PROBE_SUCCESS_B2__GFX11 = 0x000001d7, - SP_PERF_SEL_SRC_CACHE_PROBE_SUCCESS_B3__GFX11 = 0x000001d8, - SP_PERF_SEL_VALU_PENDING_QUEUE_STALL__GFX11 = 0x000001d9, - SP_PERF_SEL_VALU_OPERAND__GFX11 = 0x000001da, - SP_PERF_SEL_VALU_VGPR_OPERAND__GFX11 = 0x000001db, - SP_PERF_SEL_VALU_OPERAND_FROM_DST_BUF__GFX11 = 0x000001dc, - SP_PERF_SEL_VALU_EXEC_MASK_CHANGE__GFX11 = 0x000001dd, - SP_PERF_SEL_VALU_COEXEC_WITH_TRANS__GFX11 = 0x000001de, - SP_PERF_SEL_VALU_SGPR_FWD_BUF_FULL__GFX11 = 0x000001df, - SP_PERF_SEL_VALU_STALL__GFX11 = 0x000001e0, - SP_PERF_SEL_VALU_STALL_VGPR_NOT_READY__GFX11 = 0x000001e1, - SP_PERF_SEL_VALU_STALL_SGPR_NOT_READY__GFX11 = 0x000001e2, - SP_PERF_SEL_VALU_STALL_VDST_FWD__GFX11 = 0x000001e3, - SP_PERF_SEL_VALU_STALL_SDST_FWD__GFX11 = 0x000001e4, - SP_PERF_SEL_VALU_STALL_DST_STALL__GFX11 = 0x000001e5, - SP_PERF_SEL_VALU_FAST_OP_STALL_VGPR_NOT_READY__GFX11 = 0x000001e6, - SP_PERF_SEL_VGPR_VMEM_RD__GFX11 = 0x000001e7, - SP_PERF_SEL_VGPR_EXP_RD__GFX11 = 0x000001e8, - SP_PERF_SEL_VGPR_SPI_WR__GFX11 = 0x000001e9, - SP_PERF_SEL_VGPR_TDLDS_DATA_WR__GFX11 = 0x000001ea, - SP_PERF_SEL_VGPR_WR__GFX11 = 0x000001eb, - SP_PERF_SEL_VGPR_RD__GFX11 = 0x000001ec, - SP_PERF_SEL_DUMMY_LAST__GFX11 = 0x000001ed, -#endif -} SQ_PERF_SEL; - -constexpr unsigned int MaxSqPerfSelGfx09 = SQC_PERF_SEL_DUMMY_LAST__GFX09; -constexpr unsigned int MaxSqPerfSelGfx10Core = SP_PERF_SEL_DUMMY_LAST__GFX10CORE; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxSqPerfSelGfx104Plus = SQ_PERF_SEL_NONE2__GFX104PLUS; -#endif - -typedef enum SQ_RSRC_BUF_TYPE { - SQ_RSRC_BUF = 0x00000000, - SQ_RSRC_BUF_RSVD_1 = 0x00000001, - SQ_RSRC_BUF_RSVD_2 = 0x00000002, - SQ_RSRC_BUF_RSVD_3 = 0x00000003, -} SQ_RSRC_BUF_TYPE; - -typedef enum SQ_RSRC_FLAT_TYPE { - SQ_RSRC_FLAT_RSVD_0 = 0x00000000, - SQ_RSRC_FLAT = 0x00000001, - SQ_RSRC_FLAT_RSVD_2 = 0x00000002, - SQ_RSRC_FLAT_RSVD_3 = 0x00000003, -} SQ_RSRC_FLAT_TYPE; - -typedef enum SQ_RSRC_IMG_TYPE { - SQ_RSRC_IMG_RSVD_0 = 0x00000000, - SQ_RSRC_IMG_RSVD_1 = 0x00000001, - SQ_RSRC_IMG_RSVD_2 = 0x00000002, - SQ_RSRC_IMG_RSVD_3 = 0x00000003, - SQ_RSRC_IMG_RSVD_4 = 0x00000004, - SQ_RSRC_IMG_RSVD_5 = 0x00000005, - SQ_RSRC_IMG_RSVD_6 = 0x00000006, - SQ_RSRC_IMG_RSVD_7 = 0x00000007, - SQ_RSRC_IMG_1D = 0x00000008, - SQ_RSRC_IMG_2D = 0x00000009, - SQ_RSRC_IMG_3D = 0x0000000a, - SQ_RSRC_IMG_CUBE = 0x0000000b, - SQ_RSRC_IMG_1D_ARRAY = 0x0000000c, - SQ_RSRC_IMG_2D_ARRAY = 0x0000000d, - SQ_RSRC_IMG_2D_MSAA = 0x0000000e, - SQ_RSRC_IMG_2D_MSAA_ARRAY = 0x0000000f, -} SQ_RSRC_IMG_TYPE; - -typedef enum SQ_SEL_XYZW01 { - SQ_SEL_0 = 0x00000000, - SQ_SEL_1 = 0x00000001, - SQ_SEL_N_BC_1 = 0x00000002, - SQ_SEL_RESERVED_1 = 0x00000003, - SQ_SEL_X = 0x00000004, - SQ_SEL_Y = 0x00000005, - SQ_SEL_Z = 0x00000006, - SQ_SEL_W = 0x00000007, -} SQ_SEL_XYZW01; - -typedef enum SQ_TEX_ANISO_RATIO { - SQ_TEX_ANISO_RATIO_1 = 0x00000000, - SQ_TEX_ANISO_RATIO_2 = 0x00000001, - SQ_TEX_ANISO_RATIO_4 = 0x00000002, - SQ_TEX_ANISO_RATIO_8 = 0x00000003, - SQ_TEX_ANISO_RATIO_16 = 0x00000004, -} SQ_TEX_ANISO_RATIO; - -typedef enum SQ_TEX_BORDER_COLOR { - SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00000000, - SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x00000001, - SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x00000002, - SQ_TEX_BORDER_COLOR_REGISTER = 0x00000003, -} SQ_TEX_BORDER_COLOR; - -typedef enum SQ_TEX_CLAMP { - SQ_TEX_WRAP = 0x00000000, - SQ_TEX_MIRROR = 0x00000001, - SQ_TEX_CLAMP_LAST_TEXEL = 0x00000002, - SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x00000003, - SQ_TEX_CLAMP_HALF_BORDER = 0x00000004, - SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x00000005, - SQ_TEX_CLAMP_BORDER = 0x00000006, - SQ_TEX_MIRROR_ONCE_BORDER = 0x00000007, -} SQ_TEX_CLAMP; - -typedef enum SQ_TEX_DEPTH_COMPARE { - SQ_TEX_DEPTH_COMPARE_NEVER = 0x00000000, - SQ_TEX_DEPTH_COMPARE_LESS = 0x00000001, - SQ_TEX_DEPTH_COMPARE_EQUAL = 0x00000002, - SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x00000003, - SQ_TEX_DEPTH_COMPARE_GREATER = 0x00000004, - SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x00000005, - SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x00000006, - SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x00000007, -} SQ_TEX_DEPTH_COMPARE; - -typedef enum SQ_TEX_MIP_FILTER { - SQ_TEX_MIP_FILTER_NONE = 0x00000000, - SQ_TEX_MIP_FILTER_POINT = 0x00000001, - SQ_TEX_MIP_FILTER_LINEAR = 0x00000002, - SQ_TEX_MIP_FILTER_POINT_ANISO_ADJ = 0x00000003, -} SQ_TEX_MIP_FILTER; - -typedef enum SQ_TEX_XY_FILTER { - SQ_TEX_XY_FILTER_POINT = 0x00000000, - SQ_TEX_XY_FILTER_BILINEAR = 0x00000001, - SQ_TEX_XY_FILTER_ANISO_POINT = 0x00000002, - SQ_TEX_XY_FILTER_ANISO_BILINEAR = 0x00000003, -} SQ_TEX_XY_FILTER; - -typedef enum SQ_TEX_Z_FILTER { - SQ_TEX_Z_FILTER_NONE = 0x00000000, - SQ_TEX_Z_FILTER_POINT = 0x00000001, - SQ_TEX_Z_FILTER_LINEAR = 0x00000002, -} SQ_TEX_Z_FILTER; - -typedef enum SQ_THREAD_TRACE_CAPTURE_MODE { - SQ_THREAD_TRACE_CAPTURE_MODE_ALL = 0x00000000, - SQ_THREAD_TRACE_CAPTURE_MODE_SELECT = 0x00000001, - SQ_THREAD_TRACE_CAPTURE_MODE_SELECT_DETAIL = 0x00000002, -} SQ_THREAD_TRACE_CAPTURE_MODE; - -typedef enum SQ_THREAD_TRACE_INST_TYPE { - SQ_THREAD_TRACE_INST_TYPE_SMEM_RD = 0x00000000, - SQ_THREAD_TRACE_INST_TYPE_SALU_32 = 0x00000001, - SQ_THREAD_TRACE_INST_TYPE_VMEM_RD = 0x00000002, - SQ_THREAD_TRACE_INST_TYPE_VMEM_WR = 0x00000003, - SQ_THREAD_TRACE_INST_TYPE_FLAT_WR = 0x00000004, - SQ_THREAD_TRACE_INST_TYPE_VALU_32 = 0x00000005, - SQ_THREAD_TRACE_INST_TYPE_LDS = 0x00000006, - SQ_THREAD_TRACE_INST_TYPE_PC = 0x00000007, - SQ_THREAD_TRACE_INST_TYPE_EXPREQ_GDS = 0x00000008, - SQ_THREAD_TRACE_INST_TYPE_EXPREQ_GFX = 0x00000009, - SQ_THREAD_TRACE_INST_TYPE_EXPGNT_PAR_COL = 0x0000000a, - SQ_THREAD_TRACE_INST_TYPE_EXPGNT_POS_GDS = 0x0000000b, - SQ_THREAD_TRACE_INST_TYPE_JUMP = 0x0000000c, - SQ_THREAD_TRACE_INST_TYPE_NEXT = 0x0000000d, - SQ_THREAD_TRACE_INST_TYPE_FLAT_RD = 0x0000000e, - SQ_THREAD_TRACE_INST_TYPE_OTHER_MSG = 0x0000000f, - SQ_THREAD_TRACE_INST_TYPE_SMEM_WR = 0x00000010, - SQ_THREAD_TRACE_INST_TYPE_SALU_64 = 0x00000011, - SQ_THREAD_TRACE_INST_TYPE_VALU_64 = 0x00000012, - SQ_THREAD_TRACE_INST_TYPE_SMEM_RD_REPLAY = 0x00000013, - SQ_THREAD_TRACE_INST_TYPE_SMEM_WR_REPLAY = 0x00000014, - SQ_THREAD_TRACE_INST_TYPE_VMEM_RD_REPLAY = 0x00000015, - SQ_THREAD_TRACE_INST_TYPE_VMEM_WR_REPLAY = 0x00000016, - SQ_THREAD_TRACE_INST_TYPE_FLAT_RD_REPLAY = 0x00000017, - SQ_THREAD_TRACE_INST_TYPE_FLAT_WR_REPLAY = 0x00000018, - SQ_THREAD_TRACE_INST_TYPE_FATAL_HALT = 0x00000019, - SQ_THREAD_TRACE_INST_TYPE_DIDT_STALL_START = 0x0000001a, - SQ_THREAD_TRACE_INST_TYPE_DIDT_STALL_END = 0x0000001b, -} SQ_THREAD_TRACE_INST_TYPE; - -typedef enum SQ_THREAD_TRACE_ISSUE { - SQ_THREAD_TRACE_ISSUE_NULL = 0x00000000, - SQ_THREAD_TRACE_ISSUE_STALL = 0x00000001, - SQ_THREAD_TRACE_ISSUE_INST = 0x00000002, - SQ_THREAD_TRACE_ISSUE_IMMED = 0x00000003, -} SQ_THREAD_TRACE_ISSUE; - -typedef enum SQ_THREAD_TRACE_ISSUE_MASK { - SQ_THREAD_TRACE_ISSUE_MASK_ALL = 0x00000000, - SQ_THREAD_TRACE_ISSUE_MASK_STALLED = 0x00000001, - SQ_THREAD_TRACE_ISSUE_MASK_STALLED_AND_IMMED = 0x00000002, - SQ_THREAD_TRACE_ISSUE_MASK_IMMED = 0x00000003, -} SQ_THREAD_TRACE_ISSUE_MASK; - -typedef enum SQ_THREAD_TRACE_MISC_TOKEN_TYPE { - SQ_THREAD_TRACE_MISC_TOKEN_TIME = 0x00000000, - SQ_THREAD_TRACE_MISC_TOKEN_TIME_RESET = 0x00000001, - SQ_THREAD_TRACE_MISC_TOKEN_PACKET_LOST = 0x00000002, - SQ_THREAD_TRACE_MISC_TOKEN_SURF_SYNC = 0x00000003, - SQ_THREAD_TRACE_MISC_TOKEN_TTRACE_STALL_BEGIN = 0x00000004, - SQ_THREAD_TRACE_MISC_TOKEN_TTRACE_STALL_END = 0x00000005, - SQ_THREAD_TRACE_MISC_TOKEN_SAVECTX = 0x00000006, - SQ_THREAD_TRACE_MISC_TOKEN_SHOOT_DOWN = 0x00000007, -} SQ_THREAD_TRACE_MISC_TOKEN_TYPE; - -typedef enum SQ_THREAD_TRACE_MODE_SEL { - SQ_THREAD_TRACE_MODE_OFF = 0x00000000, - SQ_THREAD_TRACE_MODE_ON = 0x00000001, -} SQ_THREAD_TRACE_MODE_SEL; - -typedef enum SQ_THREAD_TRACE_REG_OP { - SQ_THREAD_TRACE_REG_OP_READ = 0x00000000, - SQ_THREAD_TRACE_REG_OP_WRITE = 0x00000001, -} SQ_THREAD_TRACE_REG_OP; - -typedef enum SQ_THREAD_TRACE_REG_TYPE { - SQ_THREAD_TRACE_REG_TYPE_EVENT = 0x00000000, - SQ_THREAD_TRACE_REG_TYPE_DRAW = 0x00000001, - SQ_THREAD_TRACE_REG_TYPE_DISPATCH = 0x00000002, - SQ_THREAD_TRACE_REG_TYPE_USERDATA = 0x00000003, - SQ_THREAD_TRACE_REG_TYPE_MARKER = 0x00000004, - SQ_THREAD_TRACE_REG_TYPE_GFXDEC = 0x00000005, - SQ_THREAD_TRACE_REG_TYPE_SHDEC = 0x00000006, - SQ_THREAD_TRACE_REG_TYPE_OTHER = 0x00000007, -} SQ_THREAD_TRACE_REG_TYPE; - -typedef enum SQ_THREAD_TRACE_TOKEN_TYPE { - SQ_THREAD_TRACE_TOKEN_MISC = 0x00000000, - SQ_THREAD_TRACE_TOKEN_TIMESTAMP = 0x00000001, - SQ_THREAD_TRACE_TOKEN_REG = 0x00000002, - SQ_THREAD_TRACE_TOKEN_WAVE_START = 0x00000003, - SQ_THREAD_TRACE_TOKEN_WAVE_ALLOC = 0x00000004, - SQ_THREAD_TRACE_TOKEN_REG_CSPRIV = 0x00000005, - SQ_THREAD_TRACE_TOKEN_WAVE_END = 0x00000006, - SQ_THREAD_TRACE_TOKEN_EVENT = 0x00000007, - SQ_THREAD_TRACE_TOKEN_EVENT_CS = 0x00000008, - SQ_THREAD_TRACE_TOKEN_EVENT_GFX1 = 0x00000009, - SQ_THREAD_TRACE_TOKEN_INST = 0x0000000a, - SQ_THREAD_TRACE_TOKEN_INST_PC = 0x0000000b, - SQ_THREAD_TRACE_TOKEN_INST_USERDATA = 0x0000000c, - SQ_THREAD_TRACE_TOKEN_ISSUE = 0x0000000d, - SQ_THREAD_TRACE_TOKEN_PERF = 0x0000000e, - SQ_THREAD_TRACE_TOKEN_REG_CS = 0x0000000f, -} SQ_THREAD_TRACE_TOKEN_TYPE; - -typedef enum SQ_THREAD_TRACE_VM_ID_MASK { - SQ_THREAD_TRACE_VM_ID_MASK_SINGLE = 0x00000000, - SQ_THREAD_TRACE_VM_ID_MASK_ALL = 0x00000001, - SQ_THREAD_TRACE_VM_ID_MASK_SINGLE_DETAIL = 0x00000002, -} SQ_THREAD_TRACE_VM_ID_MASK; - -typedef enum SQ_THREAD_TRACE_WAVE_MASK { - SQ_THREAD_TRACE_WAVE_MASK_NONE = 0x00000000, - SQ_THREAD_TRACE_WAVE_MASK_ALL = 0x00000001, -} SQ_THREAD_TRACE_WAVE_MASK; - -typedef enum SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX { - SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX_WREXEC = 0x00000018, - SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX_RESTORE = 0x00000019, -} SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX; - -typedef enum SQ_TT_MODE { - SQ_TT_MODE_OFF = 0x00000000, - SQ_TT_MODE_ON = 0x00000001, - SQ_TT_MODE_GLOBAL = 0x00000002, - SQ_TT_MODE_DETAIL = 0x00000003, -} SQ_TT_MODE; - -typedef enum SQ_TT_RT_FREQ { - SQ_TT_RT_FREQ_NEVER = 0x00000000, - SQ_TT_RT_FREQ_1024_CLK = 0x00000001, - SQ_TT_RT_FREQ_4096_CLK = 0x00000002, -} SQ_TT_RT_FREQ; - -typedef enum SQ_TT_TOKEN_MASK_INST_EXCLUDE { - SQ_TT_INST_EXCLUDE_VMEM_OTHER_SIMD_BIT = 0x00000001, - SQ_TT_INST_EXCLUDE_EXPGNT234_BIT = 0x00000002, -} SQ_TT_TOKEN_MASK_INST_EXCLUDE; - -typedef enum SQ_TT_TOKEN_MASK_INST_EXCLUDE_SHIFT { - SQ_TT_INST_EXCLUDE_VMEM_OTHER_SIMD_SHIFT = 0x00000000, - SQ_TT_INST_EXCLUDE_EXPGNT234_SHIFT = 0x00000001, -} SQ_TT_TOKEN_MASK_INST_EXCLUDE_SHIFT; - -typedef enum SQ_TT_TOKEN_MASK_REG_EXCLUDE { - SQ_TT_REG_EXCLUDE_USER_DATA_BIT = 0x00000001, - SQ_TT_REG_EXCLUDE_CP_ME_MC_RADDR_BIT = 0x00000002, - SQ_TT_REG_EXCLUDE_GRBM_COMPUTE_EXCLUDE_BIT__GFX103PLUSEXCLUSIVE = 0x00000004, -} SQ_TT_TOKEN_MASK_REG_EXCLUDE; - -typedef enum SQ_TT_TOKEN_MASK_REG_EXCLUDE_SHIFT { - SQ_TT_REG_EXCLUDE_USER_DATA_SHIFT = 0x00000000, - SQ_TT_REG_EXCLUDE_CP_ME_MC_RADDR_SHIFT = 0x00000001, - SQ_TT_REG_EXCLUDE_GRBM_COMPUTE_EXCLUDE_SHIFT__GFX103PLUSEXCLUSIVE = 0x00000002, -} SQ_TT_TOKEN_MASK_REG_EXCLUDE_SHIFT; - -typedef enum SQ_TT_TOKEN_MASK_REG_INCLUDE { - SQ_TT_TOKEN_MASK_SQDEC_BIT = 0x00000001, - SQ_TT_TOKEN_MASK_SHDEC_BIT = 0x00000002, - SQ_TT_TOKEN_MASK_GFXUDEC_BIT = 0x00000004, - SQ_TT_TOKEN_MASK_COMP_BIT = 0x00000008, - SQ_TT_TOKEN_MASK_CONTEXT_BIT = 0x00000010, - SQ_TT_TOKEN_MASK_CONFIG_BIT = 0x00000020, - SQ_TT_TOKEN_MASK_READS_BIT__GFX10 = 0x00000080, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SQ_TT_TOKEN_MASK_ALL_BIT__GFX104PLUS = 0x00000040, -#endif - SQ_TT_TOKEN_MASK_OTHER_BIT__GFX10CORE = 0x00000040, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SQ_TT_TOKEN_MASK_RSVD_BIT__GFX11 = 0x00000080, -#endif -} SQ_TT_TOKEN_MASK_REG_INCLUDE; - -typedef enum SQ_TT_TOKEN_MASK_REG_INCLUDE_SHIFT { - SQ_TT_TOKEN_MASK_SQDEC_SHIFT = 0x00000000, - SQ_TT_TOKEN_MASK_SHDEC_SHIFT = 0x00000001, - SQ_TT_TOKEN_MASK_GFXUDEC_SHIFT = 0x00000002, - SQ_TT_TOKEN_MASK_COMP_SHIFT = 0x00000003, - SQ_TT_TOKEN_MASK_CONTEXT_SHIFT = 0x00000004, - SQ_TT_TOKEN_MASK_CONFIG_SHIFT = 0x00000005, - SQ_TT_TOKEN_MASK_READS_SHIFT__GFX10 = 0x00000007, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SQ_TT_TOKEN_MASK_ALL_SHIFT__GFX104PLUS = 0x00000006, -#endif - SQ_TT_TOKEN_MASK_OTHER_SHIFT__GFX10CORE = 0x00000006, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SQ_TT_TOKEN_MASK_RSVD_SHIFT__GFX11 = 0x00000007, -#endif -} SQ_TT_TOKEN_MASK_REG_INCLUDE_SHIFT; - -typedef enum SQ_TT_TOKEN_MASK_TOKEN_EXCLUDE_SHIFT { - SQ_TT_TOKEN_EXCLUDE_VMEMEXEC_SHIFT = 0x00000000, - SQ_TT_TOKEN_EXCLUDE_ALUEXEC_SHIFT = 0x00000001, - SQ_TT_TOKEN_EXCLUDE_VALUINST_SHIFT = 0x00000002, - SQ_TT_TOKEN_EXCLUDE_WAVERDY_SHIFT = 0x00000003, - SQ_TT_TOKEN_EXCLUDE_IMMEDIATE_SHIFT = 0x00000005, - SQ_TT_TOKEN_EXCLUDE_REG_SHIFT = 0x00000006, - SQ_TT_TOKEN_EXCLUDE_EVENT_SHIFT = 0x00000007, - SQ_TT_TOKEN_EXCLUDE_INST_SHIFT = 0x00000008, - SQ_TT_TOKEN_EXCLUDE_UTILCTR_SHIFT = 0x00000009, - SQ_TT_TOKEN_EXCLUDE_WAVEALLOC_SHIFT = 0x0000000a, - SQ_TT_TOKEN_EXCLUDE_IMMED1_SHIFT__GFX101 = 0x00000004, - SQ_TT_TOKEN_EXCLUDE_WAVESTARTEND_SHIFT__GFX103PLUSEXCLUSIVE = 0x00000004, - SQ_TT_TOKEN_EXCLUDE_PERF_SHIFT__GFX10COREPLUS = 0x0000000b, -} SQ_TT_TOKEN_MASK_TOKEN_EXCLUDE_SHIFT; - -typedef enum SQ_TT_UTIL_TIMER { - SQ_TT_UTIL_TIMER_100_CLK = 0x00000000, - SQ_TT_UTIL_TIMER_250_CLK = 0x00000001, -} SQ_TT_UTIL_TIMER; - -typedef enum SQ_TT_WAVESTART_MODE { - SQ_TT_WAVESTART_MODE_SHORT = 0x00000000, - SQ_TT_WAVESTART_MODE_ALLOC = 0x00000001, - SQ_TT_WAVESTART_MODE_PBB_ID = 0x00000002, -} SQ_TT_WAVESTART_MODE; - -typedef enum SQ_TT_WTYPE_INCLUDE { - SQ_TT_WTYPE_INCLUDE_PS_BIT = 0x00000001, - SQ_TT_WTYPE_INCLUDE_GS_BIT = 0x00000004, - SQ_TT_WTYPE_INCLUDE_HS_BIT = 0x00000010, - SQ_TT_WTYPE_INCLUDE_CS_BIT = 0x00000040, - SQ_TT_WTYPE_INCLUDE_VS_BIT__GFX10 = 0x00000002, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SQ_TT_WTYPE_INCLUDE_RSVD1_BIT__GFX104PLUS = 0x00000008, - SQ_TT_WTYPE_INCLUDE_RSVD2_BIT__GFX104PLUS = 0x00000020, -#endif - SQ_TT_WTYPE_INCLUDE_ES_BIT__GFX10CORE = 0x00000008, - SQ_TT_WTYPE_INCLUDE_LS_BIT__GFX10CORE = 0x00000020, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SQ_TT_WTYPE_INCLUDE_RSVD0_BIT__GFX11 = 0x00000002, -#endif -} SQ_TT_WTYPE_INCLUDE; - -typedef enum SQ_TT_WTYPE_INCLUDE_SHIFT { - SQ_TT_WTYPE_INCLUDE_PS_SHIFT = 0x00000000, - SQ_TT_WTYPE_INCLUDE_GS_SHIFT = 0x00000002, - SQ_TT_WTYPE_INCLUDE_HS_SHIFT = 0x00000004, - SQ_TT_WTYPE_INCLUDE_CS_SHIFT = 0x00000006, - SQ_TT_WTYPE_INCLUDE_VS_SHIFT__GFX10 = 0x00000001, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SQ_TT_WTYPE_INCLUDE_RSVD1_SHIFT__GFX104PLUS = 0x00000003, - SQ_TT_WTYPE_INCLUDE_RSVD2_SHIFT__GFX104PLUS = 0x00000005, -#endif - SQ_TT_WTYPE_INCLUDE_ES_SHIFT__GFX10CORE = 0x00000003, - SQ_TT_WTYPE_INCLUDE_LS_SHIFT__GFX10CORE = 0x00000005, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SQ_TT_WTYPE_INCLUDE_RSVD0_SHIFT__GFX11 = 0x00000001, -#endif -} SQ_TT_WTYPE_INCLUDE_SHIFT; - -typedef enum StencilFormat { - STENCIL_INVALID = 0x00000000, - STENCIL_8 = 0x00000001, -} StencilFormat; - -typedef enum StencilOp { - STENCIL_KEEP = 0x00000000, - STENCIL_ZERO = 0x00000001, - STENCIL_ONES = 0x00000002, - STENCIL_REPLACE_TEST = 0x00000003, - STENCIL_REPLACE_OP = 0x00000004, - STENCIL_ADD_CLAMP = 0x00000005, - STENCIL_SUB_CLAMP = 0x00000006, - STENCIL_INVERT = 0x00000007, - STENCIL_ADD_WRAP = 0x00000008, - STENCIL_SUB_WRAP = 0x00000009, - STENCIL_AND = 0x0000000a, - STENCIL_OR = 0x0000000b, - STENCIL_XOR = 0x0000000c, - STENCIL_NAND = 0x0000000d, - STENCIL_NOR = 0x0000000e, - STENCIL_XNOR = 0x0000000f, -} StencilOp; - -typedef enum SurfaceArray { - ARRAY_1D = 0x00000000, - ARRAY_2D = 0x00000001, - ARRAY_3D = 0x00000002, - ARRAY_3D_SLICE = 0x00000003, -} SurfaceArray; - -typedef enum SurfaceEndian { - ENDIAN_NONE = 0x00000000, - ENDIAN_8IN16 = 0x00000001, - ENDIAN_8IN32 = 0x00000002, - ENDIAN_8IN64 = 0x00000003, -} SurfaceEndian; - -typedef enum SurfaceFormat { - FMT_INVALID = 0x00000000, - FMT_8 = 0x00000001, - FMT_16 = 0x00000002, - FMT_8_8 = 0x00000003, - FMT_32 = 0x00000004, - FMT_16_16 = 0x00000005, - FMT_10_11_11 = 0x00000006, - FMT_11_11_10 = 0x00000007, - FMT_10_10_10_2 = 0x00000008, - FMT_2_10_10_10 = 0x00000009, - FMT_8_8_8_8 = 0x0000000a, - FMT_32_32 = 0x0000000b, - FMT_16_16_16_16 = 0x0000000c, - FMT_32_32_32 = 0x0000000d, - FMT_32_32_32_32 = 0x0000000e, - FMT_RESERVED_4 = 0x0000000f, - FMT_5_6_5 = 0x00000010, - FMT_1_5_5_5 = 0x00000011, - FMT_5_5_5_1 = 0x00000012, - FMT_4_4_4_4 = 0x00000013, - FMT_8_24 = 0x00000014, - FMT_24_8 = 0x00000015, - FMT_X24_8_32_FLOAT = 0x00000016, - FMT_RESERVED_33 = 0x00000017, - FMT_11_11_10_FLOAT = 0x00000018, - FMT_16_FLOAT = 0x00000019, - FMT_32_FLOAT = 0x0000001a, - FMT_16_16_FLOAT = 0x0000001b, - FMT_8_24_FLOAT = 0x0000001c, - FMT_24_8_FLOAT = 0x0000001d, - FMT_32_32_FLOAT = 0x0000001e, - FMT_10_11_11_FLOAT = 0x0000001f, - FMT_16_16_16_16_FLOAT = 0x00000020, - FMT_3_3_2 = 0x00000021, - FMT_6_5_5 = 0x00000022, - FMT_32_32_32_32_FLOAT = 0x00000023, - FMT_RESERVED_36 = 0x00000024, - FMT_1 = 0x00000025, - FMT_1_REVERSED = 0x00000026, - FMT_GB_GR = 0x00000027, - FMT_BG_RG = 0x00000028, - FMT_32_AS_8 = 0x00000029, - FMT_32_AS_8_8 = 0x0000002a, - FMT_5_9_9_9_SHAREDEXP = 0x0000002b, - FMT_8_8_8 = 0x0000002c, - FMT_16_16_16 = 0x0000002d, - FMT_16_16_16_FLOAT = 0x0000002e, - FMT_4_4 = 0x0000002f, - FMT_32_32_32_FLOAT = 0x00000030, - FMT_BC1 = 0x00000031, - FMT_BC2 = 0x00000032, - FMT_BC3 = 0x00000033, - FMT_BC4 = 0x00000034, - FMT_BC5 = 0x00000035, - FMT_BC6 = 0x00000036, - FMT_BC7 = 0x00000037, - FMT_32_AS_32_32_32_32 = 0x00000038, - FMT_APC3 = 0x00000039, - FMT_APC4 = 0x0000003a, - FMT_APC5 = 0x0000003b, - FMT_APC6 = 0x0000003c, - FMT_APC7 = 0x0000003d, - FMT_CTX1__CORE = 0x0000003e, - FMT_RESERVED_63__CORE = 0x0000003f, -} SurfaceFormat; - -typedef enum SurfaceNumber { - NUMBER_UNORM = 0x00000000, - NUMBER_SNORM = 0x00000001, - NUMBER_USCALED = 0x00000002, - NUMBER_SSCALED = 0x00000003, - NUMBER_UINT = 0x00000004, - NUMBER_SINT = 0x00000005, - NUMBER_SRGB = 0x00000006, - NUMBER_FLOAT = 0x00000007, -} SurfaceNumber; - -typedef enum SurfaceSwap { - SWAP_STD = 0x00000000, - SWAP_ALT = 0x00000001, - SWAP_STD_REV = 0x00000002, - SWAP_ALT_REV = 0x00000003, -} SurfaceSwap; - -typedef enum SurfaceTiling { - ARRAY_LINEAR = 0x00000000, - ARRAY_TILED = 0x00000001, -} SurfaceTiling; - -typedef enum SU_PERFCNT_SEL { - PERF_PAPC_PASX_REQ = 0x00000000, - PERF_PAPC_PASX_VTX_KILL_DISCARD = 0x00000006, - PERF_PAPC_PASX_VTX_NAN_DISCARD = 0x00000007, - PERF_PAPC_CLPR_CULL_PRIM = 0x0000000e, - PERF_PAPC_CLPR_VVUCP_CULL_PRIM = 0x0000000f, - PERF_PAPC_CLPR_VV_CULL_PRIM = 0x00000010, - PERF_PAPC_CLPR_UCP_CULL_PRIM = 0x00000011, - PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM = 0x00000012, - PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM = 0x00000013, - PERF_PAPC_CLPR_CULL_TO_NULL_PRIM = 0x00000014, - PERF_PAPC_CLPR_VVUCP_CLIP_PRIM = 0x00000015, - PERF_PAPC_CLPR_VV_CLIP_PRIM = 0x00000016, - PERF_PAPC_CLPR_UCP_CLIP_PRIM = 0x00000017, - PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE = 0x00000018, - PERF_PAPC_CLPR_CLIP_PLANE_CNT_1 = 0x00000019, - PERF_PAPC_CLPR_CLIP_PLANE_CNT_2 = 0x0000001a, - PERF_PAPC_CLPR_CLIP_PLANE_CNT_3 = 0x0000001b, - PERF_PAPC_CLPR_CLIP_PLANE_CNT_4 = 0x0000001c, - PERF_PAPC_CLPR_CLIP_PLANE_CNT_5_8 = 0x0000001d, - PERF_PAPC_CLPR_CLIP_PLANE_NEAR = 0x0000001f, - PERF_PAPC_CLPR_CLIP_PLANE_FAR = 0x00000020, - PERF_PAPC_CLPR_CLIP_PLANE_LEFT = 0x00000021, - PERF_PAPC_CLPR_CLIP_PLANE_RIGHT = 0x00000022, - PERF_PAPC_CLPR_CLIP_PLANE_TOP = 0x00000023, - PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM = 0x00000024, - PERF_PAPC_CLPR_RASTER_KILL_CULL_PRIM = 0x00000026, - PERF_PAPC_CLSM_NULL_PRIM = 0x00000027, - PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM = 0x00000028, - PERF_PAPC_CLSM_CULL_TO_NULL_PRIM = 0x00000029, - PERF_PAPC_CLSM_OUT_PRIM_CNT_1 = 0x0000002a, - PERF_PAPC_CLSM_OUT_PRIM_CNT_2 = 0x0000002b, - PERF_PAPC_CLSM_OUT_PRIM_CNT_3 = 0x0000002c, - PERF_PAPC_CLSM_OUT_PRIM_CNT_4 = 0x0000002d, - PERF_PAPC_CLSM_OUT_PRIM_CNT_5_8 = 0x0000002e, - PERF_PAPC_CLIPGA_VTE_KILL_PRIM = 0x00000030, - PERF_PAPC_SU_INPUT_PRIM = 0x00000031, - PERF_PAPC_SU_INPUT_CLIP_PRIM = 0x00000032, - PERF_PAPC_SU_INPUT_NULL_PRIM = 0x00000033, - PERF_PAPC_SU_INPUT_PRIM_DUAL = 0x00000034, - PERF_PAPC_SU_INPUT_CLIP_PRIM_DUAL = 0x00000035, - PERF_PAPC_SU_ZERO_AREA_CULL_PRIM = 0x00000036, - PERF_PAPC_SU_BACK_FACE_CULL_PRIM = 0x00000037, - PERF_PAPC_SU_FRONT_FACE_CULL_PRIM = 0x00000038, - PERF_PAPC_SU_POLYMODE_FACE_CULL = 0x00000039, - PERF_PAPC_SU_POLYMODE_BACK_CULL = 0x0000003a, - PERF_PAPC_SU_POLYMODE_FRONT_CULL = 0x0000003b, - PERF_PAPC_SU_POLYMODE_INVALID_FILL = 0x0000003c, - PERF_PAPC_SU_OUTPUT_PRIM = 0x0000003d, - PERF_PAPC_SU_OUTPUT_CLIP_PRIM = 0x0000003e, - PERF_PAPC_SU_OUTPUT_NULL_PRIM = 0x0000003f, - PERF_PAPC_SU_OUTPUT_EVENT_FLAG = 0x00000040, - PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT = 0x00000041, - PERF_PAPC_SU_OUTPUT_END_OF_PACKET = 0x00000042, - PERF_PAPC_SU_OUTPUT_POLYMODE_FACE = 0x00000043, - PERF_PAPC_SU_OUTPUT_POLYMODE_BACK = 0x00000044, - PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT = 0x00000045, - PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE = 0x00000046, - PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK = 0x00000047, - PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT = 0x00000048, - PERF_PAPC_SU_OUTPUT_PRIM_DUAL = 0x00000049, - PERF_PAPC_SU_OUTPUT_CLIP_PRIM_DUAL = 0x0000004a, - PERF_PAPC_SU_OUTPUT_POLYMODE_DUAL = 0x0000004b, - PERF_PAPC_SU_OUTPUT_CLIP_POLYMODE_DUAL = 0x0000004c, - PERF_PAPC_PASX_REQ_IDLE = 0x0000004d, - PERF_PAPC_PASX_REQ_BUSY = 0x0000004e, - PERF_PAPC_PASX_REQ_STALLED = 0x0000004f, - PERF_PAPC_PASX_REC_IDLE = 0x00000050, - PERF_PAPC_PASX_REC_BUSY = 0x00000051, - PERF_PAPC_PASX_REC_STARVED_SX = 0x00000052, - PERF_PAPC_PASX_REC_STALLED = 0x00000053, - PERF_PAPC_PASX_REC_STALLED_POS_MEM = 0x00000054, - PERF_PAPC_PASX_REC_STALLED_CCGSM_IN = 0x00000055, - PERF_PAPC_CCGSM_IDLE = 0x00000056, - PERF_PAPC_CCGSM_BUSY = 0x00000057, - PERF_PAPC_CCGSM_STALLED = 0x00000058, - PERF_PAPC_CLPRIM_IDLE = 0x00000059, - PERF_PAPC_CLPRIM_BUSY = 0x0000005a, - PERF_PAPC_CLPRIM_STALLED = 0x0000005b, - PERF_PAPC_CLPRIM_STARVED_CCGSM = 0x0000005c, - PERF_PAPC_CLIPSM_IDLE = 0x0000005d, - PERF_PAPC_CLIPSM_BUSY = 0x0000005e, - PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH = 0x0000005f, - PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ = 0x00000060, - PERF_PAPC_CLIPSM_WAIT_CLIPGA = 0x00000061, - PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP = 0x00000062, - PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM = 0x00000063, - PERF_PAPC_CLIPGA_IDLE = 0x00000064, - PERF_PAPC_CLIPGA_BUSY = 0x00000065, - PERF_PAPC_CLIPGA_STARVED_VTE_CLIP = 0x00000066, - PERF_PAPC_CLIPGA_STALLED = 0x00000067, - PERF_PAPC_CLIP_IDLE = 0x00000068, - PERF_PAPC_CLIP_BUSY = 0x00000069, - PERF_PAPC_SU_IDLE = 0x0000006a, - PERF_PAPC_SU_BUSY = 0x0000006b, - PERF_PAPC_SU_STARVED_CLIP = 0x0000006c, - PERF_PAPC_SU_STALLED_SC = 0x0000006d, - PERF_PAPC_CL_DYN_SCLK_VLD = 0x0000006e, - PERF_PAPC_SU_DYN_SCLK_VLD = 0x0000006f, - PERF_PAPC_PA_REG_SCLK_VLD = 0x00000070, - PERF_PAPC_SU_SE0_PRIM_FILTER_CULL = 0x00000078, - PERF_PAPC_SU_SE1_PRIM_FILTER_CULL = 0x00000079, - PERF_PAPC_SU_SE0_OUTPUT_PRIM = 0x0000007b, - PERF_PAPC_SU_SE1_OUTPUT_PRIM = 0x0000007c, - PERF_PAPC_SU_SE0_OUTPUT_NULL_PRIM = 0x0000007e, - PERF_PAPC_SU_SE1_OUTPUT_NULL_PRIM = 0x0000007f, - PERF_PAPC_SU_SE0_STALLED_SC = 0x00000083, - PERF_PAPC_SU_SE1_STALLED_SC = 0x00000084, - PERF_PAPC_CLSM_CLIPPING_PRIM = 0x00000086, - PERF_PAPC_SU_CULLED_PRIM = 0x00000087, - PERF_PAPC_SU_OUTPUT_EOPG = 0x00000088, - PERF_PAPC_SU_SE2_PRIM_FILTER_CULL = 0x00000089, - PERF_PAPC_SU_SE3_PRIM_FILTER_CULL = 0x0000008a, - PERF_PAPC_SU_SE2_OUTPUT_PRIM = 0x0000008b, - PERF_PAPC_SU_SE3_OUTPUT_PRIM = 0x0000008c, - PERF_PAPC_SU_SE2_OUTPUT_NULL_PRIM = 0x0000008d, - PERF_PAPC_SU_SE3_OUTPUT_NULL_PRIM = 0x0000008e, - PERF_PAPC_SU_SE2_STALLED_SC = 0x00000097, - PERF_PAPC_SU_SE3_STALLED_SC = 0x00000098, - PERF_SU_SMALL_PRIM_FILTER_CULL_CNT = 0x00000099, - PERF_SMALL_PRIM_CULL_PRIM_1X1 = 0x0000009a, - PERF_SMALL_PRIM_CULL_PRIM_2X1 = 0x0000009b, - PERF_SMALL_PRIM_CULL_PRIM_1X2 = 0x0000009c, - PERF_SMALL_PRIM_CULL_PRIM_2X2 = 0x0000009d, - PERF_SMALL_PRIM_CULL_PRIM_3X1 = 0x0000009e, - PERF_SMALL_PRIM_CULL_PRIM_1X3 = 0x0000009f, - PERF_SMALL_PRIM_CULL_PRIM_3X2 = 0x000000a0, - PERF_SMALL_PRIM_CULL_PRIM_2X3 = 0x000000a1, - PERF_SMALL_PRIM_CULL_PRIM_NX1 = 0x000000a2, - PERF_SMALL_PRIM_CULL_PRIM_1XN = 0x000000a3, - PERF_SMALL_PRIM_CULL_PRIM_NX2 = 0x000000a4, - PERF_SMALL_PRIM_CULL_PRIM_2XN = 0x000000a5, - PERF_SC0_QUALIFIED_SEND_BUSY_EVENT = 0x000000a9, - PERF_SC0_QUALIFIED_SEND_NOT_BUSY_EVENT = 0x000000aa, - PERF_SC1_QUALIFIED_SEND_BUSY_EVENT = 0x000000ab, - PERF_SC1_QUALIFIED_SEND_NOT_BUSY_EVENT = 0x000000ac, - PERF_SC2_QUALIFIED_SEND_BUSY_EVENT = 0x000000ad, - PERF_SC2_QUALIFIED_SEND_NOT_BUSY_EVENT = 0x000000ae, - PERF_SC3_QUALIFIED_SEND_BUSY_EVENT = 0x000000af, - PERF_SC3_QUALIFIED_SEND_NOT_BUSY_EVENT = 0x000000b0, - PERF_UTC_SIDEBAND_DRIVER_WAITING_ON_UTCL1__GFX09 = 0x000000b1, - PERF_UTC_SIDEBAND_DRIVER_STALLING_CLIENT__GFX09 = 0x000000b2, - PERF_UTC_SIDEBAND_DRIVER_BUSY__GFX09 = 0x000000b3, - PERF_UTC_INDEX_DRIVER_WAITING_ON_UTCL1__GFX09 = 0x000000b4, - PERF_UTC_INDEX_DRIVER_STALLING_CLIENT__GFX09 = 0x000000b5, - PERF_UTC_INDEX_DRIVER_BUSY__GFX09 = 0x000000b6, - PERF_UTC_POSITION_DRIVER_WAITING_ON_UTCL1__GFX09 = 0x000000b7, - PERF_UTC_POSITION_DRIVER_STALLING_CLIENT__GFX09 = 0x000000b8, - PERF_UTC_POSITION_DRIVER_BUSY__GFX09 = 0x000000b9, - PERF_UTC_SIDEBAND_RECEIVER_STALLING_UTCL1__GFX09 = 0x000000ba, - PERF_UTC_SIDEBAND_RECEIVER_STALLED_BY_ARBITER__GFX09 = 0x000000bb, - PERF_UTC_SIDEBAND_RECEIVER_BUSY__GFX09 = 0x000000bc, - PERF_UTC_INDEX_RECEIVER_STALLING_UTCL1__GFX09 = 0x000000bd, - PERF_UTC_INDEX_RECEIVER_STALLED_BY_ARBITER__GFX09 = 0x000000be, - PERF_UTC_INDEX_RECEIVER_BUSY__GFX09 = 0x000000bf, - PERF_UTC_POSITION_RECEIVER_STALLING_UTCL1__GFX09 = 0x000000c0, - PERF_UTC_POSITION_RECEIVER_STALLED_BY_ARBITER__GFX09 = 0x000000c1, - PERF_UTC_POSITION_RECEIVER_BUSY__GFX09 = 0x000000c2, - PERF_TC_ARBITER_WAITING_FOR_TC_INTERFACE__GFX09 = 0x000000c3, - PERF_TCIF_STALLING_CLIENT_NO_CREDITS__GFX09 = 0x000000c4, - PERF_TCIF_BUSY__GFX09 = 0x000000c5, - PERF_TCIF_SIDEBAND_RDREQ__GFX09 = 0x000000c6, - PERF_TCIF_INDEX_RDREQ__GFX09 = 0x000000c7, - PERF_TCIF_POSITION_RDREQ__GFX09 = 0x000000c8, - PERF_SIDEBAND_WAITING_ON_UTCL1__GFX09 = 0x000000c9, - PERF_SIDEBAND_WAITING_ON_FULL_SIDEBAND_MEMORY__GFX09 = 0x000000ca, - PERF_WRITING_TO_SIDEBAND_MEMORY__GFX09 = 0x000000cb, - PERF_SIDEBAND_EXPECTING_1_POSSIBLE_VALID_DWORD__GFX09 = 0x000000cc, - PERF_SIDEBAND_EXPECTING_2_TO_15_POSSIBLE_VALID_DWORD__GFX09 = 0x000000cd, - PERF_SIDEBAND_EXPECTING_16_POSSIBLE_VALID_DWORD__GFX09 = 0x000000ce, - PERF_SIDEBAND_WAITING_ON_RETURNED_DATA__GFX09 = 0x000000cf, - PERF_SIDEBAND_POP_BIT_FIFO_FULL__GFX09 = 0x000000d0, - PERF_SIDEBAND_FIFO_VMID_FIFO_FULL__GFX09 = 0x000000d1, - PERF_SIDEBAND_INVALID_REFETCH__GFX09 = 0x000000d2, - PERF_SIDEBAND_QUALIFIED_BUSY__GFX09 = 0x000000d3, - PERF_SIDEBAND_QUALIFIED_STARVED__GFX09 = 0x000000d4, - PERF_SIDEBAND_0_VALID_DWORDS_RECEIVED___GFX09 = 0x000000d5, - PERF_SIDEBAND_1_TO_7_VALID_DWORDS_RECEIVED___GFX09 = 0x000000d6, - PERF_SIDEBAND_8_TO_15_VALID_DWORDS_RECEIVED___GFX09 = 0x000000d7, - PERF_SIDEBAND_16_VALID_DWORDS_RECEIVED___GFX09 = 0x000000d8, - PERF_INDEX_REQUEST_WAITING_ON_TOKENS__GFX09 = 0x000000d9, - PERF_INDEX_REQUEST_WAITING_ON_FULL_RECEIVE_FIFO__GFX09 = 0x000000da, - PERF_INDEX_REQUEST_QUALIFIED_BUSY__GFX09 = 0x000000db, - PERF_INDEX_REQUEST_QUALIFIED_STARVED__GFX09 = 0x000000dc, - PERF_INDEX_RECEIVE_WAITING_ON_RETURNED_CACHELINE__GFX09 = 0x000000dd, - PERF_INDEX_RECEIVE_WAITING_ON_PRIM_INDICES_FIFO__GFX09 = 0x000000de, - PERF_INDEX_RECEIVE_PRIM_INDICES_FIFO_WRITE__GFX09 = 0x000000df, - PERF_INDEX_RECEIVE_QUALIFIED_BUSY__GFX09 = 0x000000e0, - PERF_INDEX_RECEIVE_QUALIFIED_STARVED__GFX09 = 0x000000e1, - PERF_INDEX_RECEIVE_0_VALID_DWORDS_THIS_CACHELINE__GFX09 = 0x000000e2, - PERF_INDEX_RECEIVE_1_TO_7_VALID_DWORDS_THIS_CACHELINE__GFX09_0 = 0x000000e3, - PERF_INDEX_RECEIVE_8_TO_15_VALID_DWORDS_THIS_CACHELINE__GFX09_0 = 0x000000e4, - PERF_INDEX_RECEIVE_16_VALID_DWORDS_THIS_CACHELINE__GFX09_0 = 0x000000e5, - PERF_POS_REQ_STALLED_BY_FULL_FETCH_TO_PRIMIC_P_FIFO__GFX09_0 = 0x000000e6, - PERF_POS_REQ_STALLED_BY_FULL_FETCH_TO_PRIMIC_S_FIFO__GFX09_0 = 0x000000e7, - PERF_POS_REQ_STALLED_BY_FULL_POSREQ_TO_POSRTN_V_FIFO__GFX09_0 = 0x000000e8, - PERF_POS_REQ_STALLED_BY_FULL_POSREQ_TO_POSRTN_S_FIFO__GFX09_0 = 0x000000e9, - PERF_POS_REQ_STALLED_BY_FULL_PA_TO_WD_DEALLOC_INDEX_FIFO__GFX09_0 = 0x000000ea, - PERF_POS_REQ_STALLED_BY_NO_TOKENS__GFX09_0 = 0x000000eb, - PERF_POS_REQ_STALLED_BY_NO_PRIM__GFX09_0 = 0x000000ec, - PERF_POS_REQ_STALLED_BY_UTCL1__GFX09_0 = 0x000000ed, - PERF_POS_REQ_FETCH_TO_PRIMIC_P_FIFO_WRITE__GFX09_0 = 0x000000ee, - PERF_POS_REQ_FETCH_TO_PRIMIC_P_FIFO_NO_WRITE__GFX09_0 = 0x000000ef, - PERF_POS_REQ_QUALIFIED_BUSY__GFX09_0 = 0x000000f0, - PERF_POS_REQ_QUALIFIED_STARVED__GFX09_0 = 0x000000f1, - PERF_POS_RET_FULL_FETCH_TO_SXIF_FIFO__GFX09_0 = 0x000000f2, - PERF_POS_RET_FULL_PA_TO_WD_DEALLOC_POSITION_FIFO__GFX09_0 = 0x000000f3, - PERF_POS_RET_WAITING_ON_RETURNED_CACHELINE__GFX09_0 = 0x000000f4, - PERF_POS_RET_FETCH_TO_SXIF_FIFO_WRITE__GFX09_0 = 0x000000f5, - PERF_POS_RET_QUALIFIED_BUSY__GFX09_0 = 0x000000f6, - PERF_POS_RET_QUALIFIED_STARVED__GFX09_0 = 0x000000f7, - PERF_TC_LATENCY_BIN0__GFX09_0 = 0x000000f8, - PERF_TC_LATENCY_BIN1__GFX09_0 = 0x000000f9, - PERF_TC_LATENCY_BIN2__GFX09_0 = 0x000000fa, - PERF_TC_LATENCY_BIN3__GFX09_0 = 0x000000fb, - PERF_TC_LATENCY_BIN4__GFX09_0 = 0x000000fc, - PERF_TC_LATENCY_BIN5__GFX09_0 = 0x000000fd, - PERF_TC_LATENCY_BIN6__GFX09_0 = 0x000000fe, - PERF_TC_LATENCY_BIN7__GFX09_0 = 0x000000ff, - PERF_TC_STREAM0_DATA_AVAILABLE__GFX09_0 = 0x00000100, - PERF_TC_STREAM1_DATA_AVAILABLE__GFX09_0 = 0x00000101, - PERF_TC_STREAM2_DATA_AVAILABLE__GFX09_0 = 0x00000102, - PERF_PAWD_DEALLOC_FIFO_IS_FULL__GFX09_0 = 0x00000103, - PERF_PAWD_DEALLOC_WAITING_TO_BE_READ__GFX09_0 = 0x00000104, - PERF_SHOOTDOWN_WAIT_ON_UTCL1__GFX09_0 = 0x00000105, - PERF_SHOOTDOWN_WAIT_ON_UTC_SIDEBAND__GFX09_0 = 0x00000106, - PERF_SHOOTDOWN_WAIT_ON_UTC_INDEX__GFX09_0 = 0x00000107, - PERF_SHOOTDOWN_WAIT_ON_UTC_POSITION__GFX09_0 = 0x00000108, - PERF_SHOOTDOWN_WAIT_ALL_CLEAN__GFX09_0 = 0x00000109, - PERF_SHOOTDOWN_WAIT_DEASSERT__GFX09_0 = 0x0000010a, - PERF_UTCL1_TRANSLATION_MISS_CLIENT0__GFX09_0 = 0x0000010b, - PERF_UTCL1_TRANSLATION_MISS_CLIENT1__GFX09_0 = 0x0000010c, - PERF_UTCL1_TRANSLATION_MISS_CLIENT2__GFX09_0 = 0x0000010d, - PERF_UTCL1_PERMISSION_MISS_CLIENT0__GFX09_0 = 0x0000010e, - PERF_UTCL1_PERMISSION_MISS_CLIENT1__GFX09_0 = 0x0000010f, - PERF_UTCL1_PERMISSION_MISS_CLIENT2__GFX09_0 = 0x00000110, - PERF_UTCL1_TRANSLATION_HIT_CLIENT0__GFX09_0 = 0x00000111, - PERF_UTCL1_TRANSLATION_HIT_CLIENT1__GFX09_0 = 0x00000112, - PERF_UTCL1_TRANSLATION_HIT_CLIENT2__GFX09_0 = 0x00000113, - PERF_UTCL1_REQUEST_CLIENT0__GFX09_0 = 0x00000114, - PERF_UTCL1_REQUEST_CLIENT1__GFX09_0 = 0x00000115, - PERF_UTCL1_REQUEST_CLIENT2__GFX09_0 = 0x00000116, - PERF_UTCL1_STALL_MISSFIFO_FULL__GFX09_0 = 0x00000117, - PERF_UTCL1_STALL_INFLIGHT_MAX__GFX09_0 = 0x00000118, - PERF_UTCL1_STALL_LRU_INFLIGHT__GFX09_0 = 0x00000119, - PERF_UTCL1_STALL_MULTI_MISS__GFX09_0 = 0x0000011a, - PERF_UTCL1_LFIFO_FULL__GFX09_0 = 0x0000011b, - PERF_UTCL1_STALL_LFIFO_NOT_RES_CLIENT0__GFX09_0 = 0x0000011c, - PERF_UTCL1_STALL_LFIFO_NOT_RES_CLIENT1__GFX09_0 = 0x0000011d, - PERF_UTCL1_STALL_LFIFO_NOT_RES_CLIENT2__GFX09_0 = 0x0000011e, - PERF_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS__GFX09_0 = 0x0000011f, - PERF_UTCL1_UTCL2_REQ__GFX09_0 = 0x00000120, - PERF_UTCL1_UTCL2_RET__GFX09_0 = 0x00000121, - PERF_UTCL1_UTCL2_INFLIGHT__GFX09_0 = 0x00000122, - PERF_CLIENT_UTCL1_INFLIGHT__GFX09_0 = 0x00000123, - PERF_PAPC_PASX_DISABLE_PIPE__GFX09_10 = 0x00000001, - PERF_PAPC_PASX_FIRST_VECTOR__GFX09_10 = 0x00000002, - PERF_PAPC_PASX_SECOND_VECTOR__GFX09_10 = 0x00000003, - PERF_PAPC_PASX_FIRST_DEAD__GFX09_10 = 0x00000004, - PERF_PAPC_PASX_SECOND_DEAD__GFX09_10 = 0x00000005, - PERF_PAPC_PA_INPUT_PRIM__GFX09_10 = 0x00000008, - PERF_PAPC_PA_INPUT_NULL_PRIM__GFX09_10 = 0x00000009, - PERF_PAPC_PA_INPUT_EVENT_FLAG__GFX09_10 = 0x0000000a, - PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT__GFX09_10 = 0x0000000b, - PERF_PAPC_PA_INPUT_END_OF_PACKET__GFX09_10 = 0x0000000c, - PERF_PAPC_PA_INPUT_EXTENDED_EVENT__GFX09_10 = 0x0000000d, - PERF_PAPC_CLPR_CLIP_PLANE_CNT_9_12__GFX09_10 = 0x0000001e, - PERF_PAPC_CLPR_GSC_KILL_CULL_PRIM__GFX09_10 = 0x00000025, - PERF_PAPC_CLSM_OUT_PRIM_CNT_9_13__GFX09_10 = 0x0000002f, - PERF_PAPC_SU_MULTI_GPU_PRIM_FILTER_CULL__GFX09_10 = 0x00000071, - PERF_PAPC_PASX_SE0_REQ__GFX09_10 = 0x00000072, - PERF_PAPC_PASX_SE1_REQ__GFX09_10 = 0x00000073, - PERF_PAPC_PASX_SE0_FIRST_VECTOR__GFX09_10 = 0x00000074, - PERF_PAPC_PASX_SE0_SECOND_VECTOR__GFX09_10 = 0x00000075, - PERF_PAPC_PASX_SE1_FIRST_VECTOR__GFX09_10 = 0x00000076, - PERF_PAPC_PASX_SE1_SECOND_VECTOR__GFX09_10 = 0x00000077, - PERF_PAPC_SU_SE01_PRIM_FILTER_CULL__GFX09_10 = 0x0000007a, - PERF_PAPC_SU_SE01_OUTPUT_PRIM__GFX09_10 = 0x0000007d, - PERF_PAPC_SU_SE01_OUTPUT_NULL_PRIM__GFX09_10 = 0x00000080, - PERF_PAPC_SU_SE0_OUTPUT_FIRST_PRIM_SLOT__GFX09_10 = 0x00000081, - PERF_PAPC_SU_SE1_OUTPUT_FIRST_PRIM_SLOT__GFX09_10 = 0x00000082, - PERF_PAPC_SU_SE01_STALLED_SC__GFX09_10 = 0x00000085, - PERF_PAPC_SU_SE0_OUTPUT_END_OF_PACKET__GFX09_10 = 0x0000008f, - PERF_PAPC_SU_SE1_OUTPUT_END_OF_PACKET__GFX09_10 = 0x00000090, - PERF_PAPC_SU_SE2_OUTPUT_END_OF_PACKET__GFX09_10 = 0x00000091, - PERF_PAPC_SU_SE3_OUTPUT_END_OF_PACKET__GFX09_10 = 0x00000092, - PERF_PAPC_SU_SE0_OUTPUT_EOPG__GFX09_10 = 0x00000093, - PERF_PAPC_SU_SE1_OUTPUT_EOPG__GFX09_10 = 0x00000094, - PERF_PAPC_SU_SE2_OUTPUT_EOPG__GFX09_10 = 0x00000095, - PERF_PAPC_SU_SE3_OUTPUT_EOPG__GFX09_10 = 0x00000096, - PERF_SMALL_PRIM_CULL_PRIM_FULL_RES_EVENT__GFX09_10 = 0x000000a6, - PERF_SMALL_PRIM_CULL_PRIM_HALF_RES_EVENT__GFX09_10 = 0x000000a7, - PERF_SMALL_PRIM_CULL_PRIM_QUARTER_RES_EVENT__GFX09_10 = 0x000000a8, - PERF_INDEX_RECEIVE_1_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000e3, - PERF_INDEX_RECEIVE_2_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000e4, - PERF_INDEX_RECEIVE_3_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000e5, - PERF_INDEX_RECEIVE_4_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000e6, - PERF_INDEX_RECEIVE_5_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000e7, - PERF_INDEX_RECEIVE_6_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000e8, - PERF_INDEX_RECEIVE_7_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000e9, - PERF_INDEX_RECEIVE_8_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000ea, - PERF_INDEX_RECEIVE_9_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000eb, - PERF_INDEX_RECEIVE_10_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000ec, - PERF_INDEX_RECEIVE_11_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000ed, - PERF_INDEX_RECEIVE_12_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000ee, - PERF_INDEX_RECEIVE_13_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000ef, - PERF_INDEX_RECEIVE_14_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000f0, - PERF_INDEX_RECEIVE_15_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000f1, - PERF_INDEX_RECEIVE_16_VALID_DWORDS_THIS_CACHELINE__GFX09_1X = 0x000000f2, - PERF_POS_REQ_STALLED_BY_FULL_FETCH_TO_PRIMIC_P_FIFO__GFX09_1X = 0x000000f3, - PERF_POS_REQ_STALLED_BY_FULL_FETCH_TO_PRIMIC_S_FIFO__GFX09_1X = 0x000000f4, - PERF_POS_REQ_STALLED_BY_FULL_POSREQ_TO_POSRTN_V_FIFO__GFX09_1X = 0x000000f5, - PERF_POS_REQ_STALLED_BY_FULL_POSREQ_TO_POSRTN_S_FIFO__GFX09_1X = 0x000000f6, - PERF_POS_REQ_STALLED_BY_FULL_PA_TO_WD_DEALLOC_INDEX_FIFO__GFX09_1X = 0x000000f7, - PERF_POS_REQ_STALLED_BY_NO_TOKENS__GFX09_1X = 0x000000f8, - PERF_POS_REQ_STARVED_BY_NO_PRIM__GFX09_1X = 0x000000f9, - PERF_POS_REQ_STALLED_BY_UTCL1__GFX09_1X = 0x000000fa, - PERF_POS_REQ_FETCH_TO_PRIMIC_P_FIFO_WRITE__GFX09_1X = 0x000000fb, - PERF_POS_REQ_FETCH_TO_PRIMIC_P_FIFO_NO_WRITE__GFX09_1X = 0x000000fc, - PERF_POS_REQ_QUALIFIED_BUSY__GFX09_1X = 0x000000fd, - PERF_POS_REQ_QUALIFIED_STARVED__GFX09_1X = 0x000000fe, - PERF_POS_REQ_REUSE_0_NEW_VERTS_THIS_PRIM__GFX09_1X = 0x000000ff, - PERF_POS_REQ_REUSE_1_NEW_VERTS_THIS_PRIM__GFX09_1X = 0x00000100, - PERF_POS_REQ_REUSE_2_NEW_VERTS_THIS_PRIM__GFX09_1X = 0x00000101, - PERF_POS_REQ_REUSE_3_NEW_VERTS_THIS_PRIM__GFX09_1X = 0x00000102, - PERF_POS_RET_FULL_FETCH_TO_SXIF_FIFO__GFX09_1X = 0x00000103, - PERF_POS_RET_FULL_PA_TO_WD_DEALLOC_POSITION_FIFO__GFX09_1X = 0x00000104, - PERF_POS_RET_WAITING_ON_RETURNED_CACHELINE__GFX09_1X = 0x00000105, - PERF_POS_RET_FETCH_TO_SXIF_FIFO_WRITE__GFX09_1X = 0x00000106, - PERF_POS_RET_QUALIFIED_BUSY__GFX09_1X = 0x00000107, - PERF_POS_RET_QUALIFIED_STARVED__GFX09_1X = 0x00000108, - PERF_POS_RET_1_CACHELINE_POSITION_USED__GFX09_1X = 0x00000109, - PERF_POS_RET_2_CACHELINE_POSITION_USED__GFX09_1X = 0x0000010a, - PERF_POS_RET_3_CACHELINE_POSITION_USED__GFX09_1X = 0x0000010b, - PERF_POS_RET_4_CACHELINE_POSITION_USED__GFX09_1X = 0x0000010c, - PERF_TC_INDEX_LATENCY_BIN0__GFX09_1X = 0x0000010d, - PERF_TC_INDEX_LATENCY_BIN1__GFX09_1X = 0x0000010e, - PERF_TC_INDEX_LATENCY_BIN2__GFX09_1X = 0x0000010f, - PERF_TC_INDEX_LATENCY_BIN3__GFX09_1X = 0x00000110, - PERF_TC_INDEX_LATENCY_BIN4__GFX09_1X = 0x00000111, - PERF_TC_INDEX_LATENCY_BIN5__GFX09_1X = 0x00000112, - PERF_TC_INDEX_LATENCY_BIN6__GFX09_1X = 0x00000113, - PERF_TC_INDEX_LATENCY_BIN7__GFX09_1X = 0x00000114, - PERF_TC_INDEX_LATENCY_BIN8__GFX09_1X = 0x00000115, - PERF_TC_INDEX_LATENCY_BIN9__GFX09_1X = 0x00000116, - PERF_TC_INDEX_LATENCY_BIN10__GFX09_1X = 0x00000117, - PERF_TC_INDEX_LATENCY_BIN11__GFX09_1X = 0x00000118, - PERF_TC_INDEX_LATENCY_BIN12__GFX09_1X = 0x00000119, - PERF_TC_INDEX_LATENCY_BIN13__GFX09_1X = 0x0000011a, - PERF_TC_INDEX_LATENCY_BIN14__GFX09_1X = 0x0000011b, - PERF_TC_INDEX_LATENCY_BIN15__GFX09_1X = 0x0000011c, - PERF_TC_POSITION_LATENCY_BIN0__GFX09_1X = 0x0000011d, - PERF_TC_POSITION_LATENCY_BIN1__GFX09_1X = 0x0000011e, - PERF_TC_POSITION_LATENCY_BIN2__GFX09_1X = 0x0000011f, - PERF_TC_POSITION_LATENCY_BIN3__GFX09_1X = 0x00000120, - PERF_TC_POSITION_LATENCY_BIN4__GFX09_1X = 0x00000121, - PERF_TC_POSITION_LATENCY_BIN5__GFX09_1X = 0x00000122, - PERF_TC_POSITION_LATENCY_BIN6__GFX09_1X = 0x00000123, - PERF_TC_POSITION_LATENCY_BIN7__GFX09_1X = 0x00000124, - PERF_TC_POSITION_LATENCY_BIN8__GFX09_1X = 0x00000125, - PERF_TC_POSITION_LATENCY_BIN9__GFX09_1X = 0x00000126, - PERF_TC_POSITION_LATENCY_BIN10__GFX09_1X = 0x00000127, - PERF_TC_POSITION_LATENCY_BIN11__GFX09_1X = 0x00000128, - PERF_TC_POSITION_LATENCY_BIN12__GFX09_1X = 0x00000129, - PERF_TC_POSITION_LATENCY_BIN13__GFX09_1X = 0x0000012a, - PERF_TC_POSITION_LATENCY_BIN14__GFX09_1X = 0x0000012b, - PERF_TC_POSITION_LATENCY_BIN15__GFX09_1X = 0x0000012c, - PERF_TC_STREAM0_DATA_AVAILABLE__GFX09_1X = 0x0000012d, - PERF_TC_STREAM1_DATA_AVAILABLE__GFX09_1X = 0x0000012e, - PERF_TC_STREAM2_DATA_AVAILABLE__GFX09_1X = 0x0000012f, - PERF_PAWD_DEALLOC_FIFO_IS_FULL__GFX09_1X = 0x00000130, - PERF_PAWD_DEALLOC_WAITING_TO_BE_READ__GFX09_1X = 0x00000131, - PERF_SHOOTDOWN_WAIT_ON_UTCL1__GFX09_1X = 0x00000132, - PERF_SHOOTDOWN_WAIT_ON_UTC_SIDEBAND__GFX09_1X = 0x00000133, - PERF_SHOOTDOWN_WAIT_ON_UTC_INDEX__GFX09_1X = 0x00000134, - PERF_SHOOTDOWN_WAIT_ON_UTC_POSITION__GFX09_1X = 0x00000135, - PERF_SHOOTDOWN_WAIT_ALL_CLEAN__GFX09_1X = 0x00000136, - PERF_SHOOTDOWN_WAIT_DEASSERT__GFX09_1X = 0x00000137, - PERF_UTCL1_TRANSLATION_MISS_CLIENT0__GFX09_1X = 0x00000138, - PERF_UTCL1_TRANSLATION_MISS_CLIENT1__GFX09_1X = 0x00000139, - PERF_UTCL1_TRANSLATION_MISS_CLIENT2__GFX09_1X = 0x0000013a, - PERF_UTCL1_PERMISSION_MISS_CLIENT0__GFX09_1X = 0x0000013b, - PERF_UTCL1_PERMISSION_MISS_CLIENT1__GFX09_1X = 0x0000013c, - PERF_UTCL1_PERMISSION_MISS_CLIENT2__GFX09_1X = 0x0000013d, - PERF_UTCL1_TRANSLATION_HIT_CLIENT0__GFX09_1X = 0x0000013e, - PERF_UTCL1_TRANSLATION_HIT_CLIENT1__GFX09_1X = 0x0000013f, - PERF_UTCL1_TRANSLATION_HIT_CLIENT2__GFX09_1X = 0x00000140, - PERF_UTCL1_REQUEST_CLIENT0__GFX09_1X = 0x00000141, - PERF_UTCL1_REQUEST_CLIENT1__GFX09_1X = 0x00000142, - PERF_UTCL1_REQUEST_CLIENT2__GFX09_1X = 0x00000143, - PERF_UTCL1_STALL_MISSFIFO_FULL__GFX09_1X = 0x00000144, - PERF_UTCL1_STALL_INFLIGHT_MAX__GFX09_1X = 0x00000145, - PERF_UTCL1_STALL_LRU_INFLIGHT__GFX09_1X = 0x00000146, - PERF_UTCL1_STALL_MULTI_MISS__GFX09_1X = 0x00000147, - PERF_UTCL1_LFIFO_FULL__GFX09_1X = 0x00000148, - PERF_UTCL1_STALL_LFIFO_NOT_RES_CLIENT0__GFX09_1X = 0x00000149, - PERF_UTCL1_STALL_LFIFO_NOT_RES_CLIENT1__GFX09_1X = 0x0000014a, - PERF_UTCL1_STALL_LFIFO_NOT_RES_CLIENT2__GFX09_1X = 0x0000014b, - PERF_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS__GFX09_1X = 0x0000014c, - PERF_UTCL1_UTCL2_REQ__GFX09_1X = 0x0000014d, - PERF_UTCL1_UTCL2_RET__GFX09_1X = 0x0000014e, - PERF_UTCL1_UTCL2_INFLIGHT__GFX09_1X = 0x0000014f, - PERF_CLIENT_UTCL1_INFLIGHT__GFX09_1X = 0x00000150, - PERF_PA_SE0_OUTPUT_QUALIFIED_CLKEN_NOT_ASSERTED__GFX09_1X = 0x00000151, - PERF_PA_SE0_OUTPUT_QUALIFIED_CLKEN_ASSERTED_NO_SEND__GFX09_1X = 0x00000152, - PERF_PA_SE0_OUTPUT_QUALIFIED_CLKEN_ASSERTED_WITH_SEND__GFX09_1X = 0x00000153, - PERF_PA_SE1_OUTPUT_QUALIFIED_CLKEN_NOT_ASSERTED__GFX09_1X = 0x00000154, - PERF_PA_SE1_OUTPUT_QUALIFIED_CLKEN_ASSERTED_NO_SEND__GFX09_1X = 0x00000155, - PERF_PA_SE1_OUTPUT_QUALIFIED_CLKEN_ASSERTED_WITH_SEND__GFX09_1X = 0x00000156, - PERF_PA_SE2_OUTPUT_QUALIFIED_CLKEN_NOT_ASSERTED__GFX09_1X = 0x00000157, - PERF_PA_SE2_OUTPUT_QUALIFIED_CLKEN_ASSERTED_NO_SEND__GFX09_1X = 0x00000158, - PERF_PA_SE2_OUTPUT_QUALIFIED_CLKEN_ASSERTED_WITH_SEND__GFX09_1X = 0x00000159, - PERF_PA_SE3_OUTPUT_QUALIFIED_CLKEN_NOT_ASSERTED__GFX09_1X = 0x0000015a, - PERF_PA_SE3_OUTPUT_QUALIFIED_CLKEN_ASSERTED_NO_SEND__GFX09_1X = 0x0000015b, - PERF_PA_SE3_OUTPUT_QUALIFIED_CLKEN_ASSERTED_WITH_SEND__GFX09_1X = 0x0000015c, - PERF_PA_VERTEX_FIFO_FULL__GFX09_1X = 0x0000015d, - PERF_PA_PRIMIC_TO_CLPRIM_FIFO_FULL__GFX09_1X = 0x0000015e, - PERF_PA_FETCH_TO_SXIF_FIFO_FULL__GFX10 = 0x000000b4, - PERF_ENGG_CSB_SPI_INPUT_FIFO_FULL__GFX10 = 0x000000bb, - PERF_ENGG_CSB_DELAY_BIN00__GFX10 = 0x000000c5, - PERF_ENGG_CSB_DELAY_BIN01__GFX10 = 0x000000c6, - PERF_ENGG_CSB_DELAY_BIN02__GFX10 = 0x000000c7, - PERF_ENGG_CSB_DELAY_BIN03__GFX10 = 0x000000c8, - PERF_ENGG_CSB_DELAY_BIN04__GFX10 = 0x000000c9, - PERF_ENGG_CSB_DELAY_BIN05__GFX10 = 0x000000ca, - PERF_ENGG_CSB_DELAY_BIN06__GFX10 = 0x000000cb, - PERF_ENGG_CSB_DELAY_BIN07__GFX10 = 0x000000cc, - PERF_ENGG_CSB_DELAY_BIN08__GFX10 = 0x000000cd, - PERF_ENGG_CSB_DELAY_BIN09__GFX10 = 0x000000ce, - PERF_ENGG_CSB_DELAY_BIN10__GFX10 = 0x000000cf, - PERF_ENGG_CSB_DELAY_BIN11__GFX10 = 0x000000d0, - PERF_ENGG_CSB_DELAY_BIN12__GFX10 = 0x000000d1, - PERF_ENGG_CSB_DELAY_BIN13__GFX10 = 0x000000d2, - PERF_ENGG_CSB_DELAY_BIN14__GFX10 = 0x000000d3, - PERF_ENGG_CSB_DELAY_BIN15__GFX10 = 0x000000d4, - PERF_ENGG_CSB_SPI_DELAY_BIN00__GFX10 = 0x000000d5, - PERF_ENGG_CSB_SPI_DELAY_BIN01__GFX10 = 0x000000d6, - PERF_ENGG_CSB_SPI_DELAY_BIN02__GFX10 = 0x000000d7, - PERF_ENGG_CSB_SPI_DELAY_BIN03__GFX10 = 0x000000d8, - PERF_ENGG_CSB_SPI_DELAY_BIN04__GFX10 = 0x000000d9, - PERF_ENGG_CSB_SPI_DELAY_BIN05__GFX10 = 0x000000da, - PERF_ENGG_CSB_SPI_DELAY_BIN06__GFX10 = 0x000000db, - PERF_ENGG_CSB_SPI_DELAY_BIN07__GFX10 = 0x000000dc, - PERF_ENGG_CSB_SPI_DELAY_BIN08__GFX10 = 0x000000dd, - PERF_ENGG_CSB_SPI_DELAY_BIN09__GFX10 = 0x000000de, - PERF_ENGG_CSB_SPI_DELAY_BIN10__GFX10 = 0x000000df, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_0_NULL_PRIMS__GFX10 = 0x000000fd, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_1_NULL_PRIMS__GFX10 = 0x000000fe, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_2_NULL_PRIMS__GFX10 = 0x000000ff, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_3_NULL_PRIMS__GFX10 = 0x00000100, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_4_NULL_PRIMS__GFX10 = 0x00000101, - PERF_ENGG_CSB_SPI_DELAY_BIN11__GFX101 = 0x000000e0, - PERF_ENGG_CSB_SPI_DELAY_BIN12__GFX101 = 0x000000e1, - PERF_ENGG_CSB_SPI_DELAY_BIN13__GFX101 = 0x000000e2, - PERF_ENGG_CSB_SPI_DELAY_BIN14__GFX101 = 0x000000e3, - PERF_ENGG_CSB_SPI_DELAY_BIN15__GFX101 = 0x000000e4, - PERF_ENGG_POS_REQ_STALLED_BY_FULL_CLIPV_FIFO__GFX101 = 0x00000109, - PERF_ENGG_INDEX_REQ_0_NEW_VERTS_THIS_PRIM__GFX103DERIVATIVE = 0x000000e1, - PERF_ENGG_INDEX_REQ_1_NEW_VERTS_THIS_PRIM__GFX103DERIVATIVE = 0x000000e2, - PERF_ENGG_INDEX_REQ_2_NEW_VERTS_THIS_PRIM__GFX103DERIVATIVE = 0x000000e3, - PERF_ENGG_INDEX_REQ_3_NEW_VERTS_THIS_PRIM__GFX103DERIVATIVE = 0x000000e4, - PERF_ENGG_CSB_PAYLOAD_INPUT_FIFO_FULL__GFX103PLUSEXCLUSIVE = 0x000000bc, - PERF_ENGG_INDEX_REQ_NULL_REQUEST__GFX103PLUSEXCLUSIVE = 0x000000e0, - PERF_ENGG_INDEX_RET_SXRX_NULL_DROPPER_STALLED_BY_FULL_PRIM_FIFO__GFX103PLUSEXCLUSIVE = 0x00000109, - PERF_ENGG_BUSY__GFX103PLUSEXCLUSIVE = 0x0000010a, - PERF_CLIPSM_CULL_PRIMS_CNT__GFX103PLUSEXCLUSIVE = 0x0000010b, - PERF_PH_SEND_1_SC__GFX103PLUSEXCLUSIVE = 0x0000010c, - PERF_PH_SEND_2_SC__GFX103PLUSEXCLUSIVE = 0x0000010d, - PERF_PH_SEND_3_SC__GFX103PLUSEXCLUSIVE = 0x0000010e, - PERF_PH_SEND_4_SC__GFX103PLUSEXCLUSIVE = 0x0000010f, - PERF_OUTPUT_PRIM_1_SC__GFX103PLUSEXCLUSIVE = 0x00000110, - PERF_OUTPUT_PRIM_2_SC__GFX103PLUSEXCLUSIVE = 0x00000111, - PERF_OUTPUT_PRIM_3_SC__GFX103PLUSEXCLUSIVE = 0x00000112, - PERF_OUTPUT_PRIM_4_SC__GFX103PLUSEXCLUSIVE = 0x00000113, - PERF_PA_VERTEX_FIFO_FULL__GFX10PLUS = 0x000000b1, - PERF_PA_PRIMIC_TO_CLPRIM_FIFO_FULL__GFX10PLUS = 0x000000b2, - PERF_PA_FETCH_TO_PRIMIC_P_FIFO_FULL__GFX10PLUS = 0x000000b3, - PERF_ENGG_CSB_MACHINE_IS_STARVED__GFX10PLUS = 0x000000b7, - PERF_ENGG_CSB_MACHINE_STALLED_BY_CSB_MEMORY__GFX10PLUS = 0x000000b8, - PERF_ENGG_CSB_MACHINE_STALLED_BY_SPI__GFX10PLUS = 0x000000b9, - PERF_ENGG_CSB_GE_INPUT_FIFO_FULL__GFX10PLUS = 0x000000ba, - PERF_ENGG_CSB_GE_INPUT_FIFO_POP_BIT__GFX10PLUS = 0x000000bd, - PERF_ENGG_CSB_PRIM_COUNT_EQ0__GFX10PLUS = 0x000000be, - PERF_ENGG_CSB_NULL_SUBGROUP__GFX10PLUS = 0x000000bf, - PERF_ENGG_CSB_GE_SENDING_SUBGROUP__GFX10PLUS = 0x000000c0, - PERF_ENGG_CSB_GE_MEMORY_FULL__GFX10PLUS = 0x000000c1, - PERF_ENGG_CSB_GE_MEMORY_EMPTY__GFX10PLUS = 0x000000c2, - PERF_ENGG_CSB_SPI_MEMORY_FULL__GFX10PLUS = 0x000000c3, - PERF_ENGG_CSB_SPI_MEMORY_EMPTY__GFX10PLUS = 0x000000c4, - PERF_ENGG_INDEX_REQ_STARVED__GFX10PLUS = 0x000000e5, - PERF_ENGG_INDEX_REQ_IDLE_AND_STALLED_BY_REQ2RTN_FIFO_FULL__GFX10PLUS = 0x000000e6, - PERF_ENGG_INDEX_REQ_BUSY_AND_STALLED_BY_REQ2RTN_FIFO_FULL__GFX10PLUS = 0x000000e7, - PERF_ENGG_INDEX_REQ_STALLED_BY_SX_CREDITS__GFX10PLUS = 0x000000e8, - PERF_ENGG_INDEX_RET_REQ2RTN_FIFO_FULL__GFX10PLUS = 0x000000e9, - PERF_ENGG_INDEX_RET_REQ2RTN_FIFO_EMPTY__GFX10PLUS = 0x000000ea, - PERF_ENGG_INDEX_RET_SX_RECEIVE_FIFO_FULL__GFX10PLUS = 0x000000eb, - PERF_ENGG_INDEX_RET_SXRX_STARVED_BY_CSB__GFX10PLUS = 0x000000ec, - PERF_ENGG_INDEX_RET_SXRX_STARVED_BY_PRIMS__GFX10PLUS = 0x000000ed, - PERF_ENGG_INDEX_RET_SXRX_STALLED_BY_PRIM_INDICES_CSB_FIFO__GFX10PLUS = 0x000000ee, - PERF_ENGG_INDEX_RET_SXRX_STALLED_BY_PRIM_INDICES_FIFO__GFX10PLUS = 0x000000ef, - PERF_ENGG_INDEX_RET_SXRX_READING_EVENT__GFX10PLUS = 0x000000f0, - PERF_ENGG_INDEX_RET_SXRX_READING_NULL_SUBGROUP__GFX10PLUS = 0x000000f1, - PERF_ENGG_INDEX_RET_SXRX_READING_SUBGROUP_PRIMCOUNT_EQ0__GFX10PLUS = 0x000000f2, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_0_VALID_PRIMS_NOPL__GFX10PLUS = 0x000000f3, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_1_VALID_PRIMS_NOPL__GFX10PLUS = 0x000000f4, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_2_VALID_PRIMS_NOPL__GFX10PLUS = 0x000000f5, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_3_VALID_PRIMS_NOPL__GFX10PLUS = 0x000000f6, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_4_VALID_PRIMS_NOPL__GFX10PLUS = 0x000000f7, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_0_VALID_PRIMS_PL__GFX10PLUS = 0x000000f8, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_1_VALID_PRIMS_PL__GFX10PLUS = 0x000000f9, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_2_VALID_PRIMS_PL__GFX10PLUS = 0x000000fa, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_3_VALID_PRIMS_PL__GFX10PLUS = 0x000000fb, - PERF_ENGG_INDEX_RET_SXRX_READING_QDWORD_4_VALID_PRIMS_PL__GFX10PLUS = 0x000000fc, - PERF_ENGG_INDEX_PRIM_IF_STALLED_BY_FULL_FETCH_TO_PRIMIC_P_FIFO__GFX10PLUS = 0x00000102, - PERF_ENGG_INDEX_PRIM_IF_STALLED_BY_FULL_FETCH_TO_PRIMIC_S_FIFO__GFX10PLUS = 0x00000103, - PERF_ENGG_INDEX_PRIM_IF_STARVED_BY_NO_CSB__GFX10PLUS = 0x00000104, - PERF_ENGG_INDEX_PRIM_IF_STARVED_BY_NO_PRIM__GFX10PLUS = 0x00000105, - PERF_ENGG_INDEX_PRIM_IF_FETCH_TO_PRIMIC_P_FIFO_WRITE__GFX10PLUS = 0x00000106, - PERF_ENGG_INDEX_PRIM_IF_FETCH_TO_PRIMIC_P_FIFO_NO_WRITE__GFX10PLUS = 0x00000107, - PERF_ENGG_POS_REQ_STARVED__GFX10PLUS = 0x00000108, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - PERF_CLPR_INPUT_PRIM__GFX11 = 0x00000008, - PERF_CLPR_INPUT_NULL_PRIM__GFX11 = 0x00000009, - PERF_CLPR_INPUT_EVENT__GFX11 = 0x0000000a, - PERF_CLPR_INPUT_FIRST_OF_SUBGROUP__GFX11 = 0x0000000b, - PERF_CLPR_INPUT_END_OF_PACKET__GFX11 = 0x0000000c, - PERF_CLPR_INPUT_EXTENDED_EVENT__GFX11 = 0x0000000d, - PERF_CLPR_CLIP_PLANE_CNT_9_PLUS__GFX11 = 0x0000001e, - PERF_PAPC_CLSM_OUT_PRIM_CNT_9_PLUS__GFX11 = 0x0000002f, - PERF_PAPC_SU_ALL_OUTPUT_PRIM__GFX11 = 0x0000007d, - PERF_PAPC_SU_ALL_OUTPUT_NULL_PRIM__GFX11 = 0x00000080, - PERF_PAPC_SU_ALL_STALLED_SC__GFX11 = 0x00000085, - PERF_ENGG_INDEX_RET_0_NEW_VERTS_THIS_PRIM__GFX11 = 0x000000e1, - PERF_ENGG_INDEX_RET_1_NEW_VERTS_THIS_PRIM__GFX11 = 0x000000e2, - PERF_ENGG_INDEX_RET_2_NEW_VERTS_THIS_PRIM__GFX11 = 0x000000e3, - PERF_ENGG_INDEX_RET_3_NEW_VERTS_THIS_PRIM__GFX11 = 0x000000e4, - PERF_PASX_POS_VECTOR__GFX11 = 0x00000114, - PERF_PASX_MISC_VECTOR__GFX11 = 0x00000115, - PERF_PASX_CCDIST0_VECTOR__GFX11 = 0x00000116, - PERF_PASX_CCDIST1_VECTOR__GFX11 = 0x00000117, - PERF_PASX_STEREO_POS_VECTOR__GFX11 = 0x00000118, - PERF_CLPR_INPUT_SEND__GFX11 = 0x00000119, - PERF_SU_INPUT_SEND__GFX11 = 0x0000011a, - PERF_SU_OUTPUT_SEND__GFX11 = 0x0000011b, - PERF_PAPC_SU_SE4_PRIM_FILTER_CULL__GFX11 = 0x0000011c, - PERF_PAPC_SU_SE5_PRIM_FILTER_CULL__GFX11 = 0x0000011d, - PERF_PAPC_SU_SE4_OUTPUT_PRIM__GFX11 = 0x0000011e, - PERF_PAPC_SU_SE5_OUTPUT_PRIM__GFX11 = 0x0000011f, - PERF_PAPC_SU_SE4_OUTPUT_NULL_PRIM__GFX11 = 0x00000120, - PERF_PAPC_SU_SE5_OUTPUT_NULL_PRIM__GFX11 = 0x00000121, - PERF_PAPC_SU_SE4_STALLED_SC__GFX11 = 0x00000122, - PERF_PAPC_SU_SE5_STALLED_SC__GFX11 = 0x00000123, - PERF_ENGG_INDEX_RET0_NEW_VERTS__GFX11 = 0x00000124, - PERF_ENGG_INDEX_RET1_NEW_VERTS__GFX11 = 0x00000125, - PERF_ENGG_INDEX_RET2_NEW_VERTS__GFX11 = 0x00000126, - PERF_ENGG_INDEX_RET3_NEW_VERTS__GFX11 = 0x00000127, - PERF_ENGG_INDEX_RET4_NEW_VERTS__GFX11 = 0x00000128, - PERF_ENGG_INDEX_RET5_NEW_VERTS__GFX11 = 0x00000129, - PERF_ENGG_INDEX_RET6_NEW_VERTS__GFX11 = 0x0000012a, - PERF_ENGG_INDEX_RET7_NEW_VERTS__GFX11 = 0x0000012b, - PERF_ENGG_INDEX_RET8_NEW_VERTS__GFX11 = 0x0000012c, - PERF_ENGG_INDEX_RET9_NEW_VERTS__GFX11 = 0x0000012d, - PERF_ENGG_INDEX_RET10_NEW_VERTS__GFX11 = 0x0000012e, - PERF_ENGG_INDEX_RET11_NEW_VERTS__GFX11 = 0x0000012f, - PERF_ENGG_INDEX_RET12_NEW_VERTS__GFX11 = 0x00000130, - PERF_PH_SEND_5_SC__GFX11 = 0x00000131, - PERF_PH_SEND_6_SC__GFX11 = 0x00000132, - PERF_OUTPUT_PRIM_5_SC__GFX11 = 0x00000133, - PERF_OUTPUT_PRIM_6_SC__GFX11 = 0x00000134, - PERF_CLPR_BACK_PRIM__GFX11 = 0x00000135, - PERF_PA_BUSY__GFX11 = 0x00000136, -#endif -} SU_PERFCNT_SEL; - -constexpr unsigned int MaxSuPerfcntSelGfx09_0 = PERF_CLIENT_UTCL1_INFLIGHT__GFX09_0; -constexpr unsigned int MaxSuPerfcntSelGfx09_1x = PERF_PA_PRIMIC_TO_CLPRIM_FIFO_FULL__GFX09_1X; -constexpr unsigned int MaxSuPerfcntSelGfx101 = PERF_ENGG_POS_REQ_STALLED_BY_FULL_CLIPV_FIFO__GFX101; -constexpr unsigned int MaxSuPerfcntSelGfx103Derivative = PERF_OUTPUT_PRIM_4_SC__GFX103PLUSEXCLUSIVE; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxSuPerfcntSelGfx11 = PERF_PA_BUSY__GFX11; -#endif - -typedef enum SWIZZLE_MODE_ENUM { - SW_LINEAR = 0x00000000, - SW_256B_S = 0x00000001, - SW_256B_D = 0x00000002, - SW_256B_R = 0x00000003, - SW_4KB_Z = 0x00000004, - SW_4KB_S = 0x00000005, - SW_4KB_D = 0x00000006, - SW_4KB_R = 0x00000007, - SW_64KB_Z = 0x00000008, - SW_64KB_S = 0x00000009, - SW_64KB_D = 0x0000000a, - SW_64KB_R = 0x0000000b, - SW_64KB_Z_T = 0x00000010, - SW_64KB_S_T = 0x00000011, - SW_64KB_D_T = 0x00000012, - SW_64KB_R_T = 0x00000013, - SW_4KB_Z_X = 0x00000014, - SW_4KB_S_X = 0x00000015, - SW_4KB_D_X = 0x00000016, - SW_4KB_R_X = 0x00000017, - SW_64KB_Z_X = 0x00000018, - SW_64KB_S_X = 0x00000019, - SW_64KB_D_X = 0x0000001a, - SW_64KB_R_X = 0x0000001b, - SW_VAR_Z__GFX09 = 0x0000000c, - SW_VAR_S__GFX09 = 0x0000000d, - SW_VAR_D__GFX09 = 0x0000000e, - SW_VAR_R__GFX09 = 0x0000000f, - SW_VAR_S_X__GFX09 = 0x0000001d, - SW_VAR_D_X__GFX09 = 0x0000001e, - SW_VAR_Z_X__GFX09_10 = 0x0000001c, - SW_VAR_R_X__GFX09_10 = 0x0000001f, - SW_VAR_Z__GFX10CORE = 0x0000000c, - SW_VAR_S__GFX10CORE = 0x0000000d, - SW_VAR_D__GFX10CORE = 0x0000000e, - SW_VAR_R__GFX10CORE = 0x0000000f, - SW_VAR_S_X__GFX10CORE = 0x0000001d, - SW_VAR_D_X__GFX10CORE = 0x0000001e, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SW_256KB_Z__GFX11 = 0x0000000c, - SW_256KB_S__GFX11 = 0x0000000d, - SW_256KB_D__GFX11 = 0x0000000e, - SW_256KB_R__GFX11 = 0x0000000f, - SW_256KB_Z_X__GFX11 = 0x0000001c, - SW_256KB_S_X__GFX11 = 0x0000001d, - SW_256KB_D_X__GFX11 = 0x0000001e, - SW_256KB_R_X__GFX11 = 0x0000001f, -#endif -} SWIZZLE_MODE_ENUM; - -typedef enum SWIZZLE_TYPE_ENUM { - SW_Z = 0x00000000, - SW_S = 0x00000001, - SW_D = 0x00000002, - SW_R = 0x00000003, - SW_L = 0x00000004, -} SWIZZLE_TYPE_ENUM; - -typedef enum SX_BLEND_OPT { - BLEND_OPT_PRESERVE_NONE_IGNORE_ALL = 0x00000000, - BLEND_OPT_PRESERVE_ALL_IGNORE_NONE = 0x00000001, - BLEND_OPT_PRESERVE_C1_IGNORE_C0 = 0x00000002, - BLEND_OPT_PRESERVE_C0_IGNORE_C1 = 0x00000003, - BLEND_OPT_PRESERVE_A1_IGNORE_A0 = 0x00000004, - BLEND_OPT_PRESERVE_A0_IGNORE_A1 = 0x00000005, - BLEND_OPT_PRESERVE_NONE_IGNORE_A0 = 0x00000006, - BLEND_OPT_PRESERVE_NONE_IGNORE_NONE = 0x00000007, -} SX_BLEND_OPT; - -typedef enum SX_DOWNCONVERT_FORMAT { - SX_RT_EXPORT_NO_CONVERSION = 0x00000000, - SX_RT_EXPORT_32_R = 0x00000001, - SX_RT_EXPORT_32_A = 0x00000002, - SX_RT_EXPORT_10_11_11 = 0x00000003, - SX_RT_EXPORT_2_10_10_10 = 0x00000004, - SX_RT_EXPORT_8_8_8_8 = 0x00000005, - SX_RT_EXPORT_5_6_5 = 0x00000006, - SX_RT_EXPORT_1_5_5_5 = 0x00000007, - SX_RT_EXPORT_4_4_4_4 = 0x00000008, - SX_RT_EXPORT_16_16_GR = 0x00000009, - SX_RT_EXPORT_16_16_AR = 0x0000000a, - SX_RT_EXPORT_2_10_10_10_7E3__GFX103COREPLUS = 0x0000000c, - SX_RT_EXPORT_2_10_10_10_6E4__GFX103COREPLUS = 0x0000000d, - SX_RT_EXPORT_9_9_9_E5__GFX103PLUSEXCLUSIVE = 0x0000000b, -} SX_DOWNCONVERT_FORMAT; - -typedef enum SX_OPT_COMB_FCN { - OPT_COMB_NONE = 0x00000000, - OPT_COMB_ADD = 0x00000001, - OPT_COMB_SUBTRACT = 0x00000002, - OPT_COMB_MIN = 0x00000003, - OPT_COMB_MAX = 0x00000004, - OPT_COMB_REVSUBTRACT = 0x00000005, - OPT_COMB_BLEND_DISABLED = 0x00000006, - OPT_COMB_SAFE_ADD = 0x00000007, -} SX_OPT_COMB_FCN; - -typedef enum SX_PERFCOUNTER_VALS { - SX_PERF_SEL_PA_IDLE_CYCLES = 0x00000000, - SX_PERF_SEL_PA_REQ = 0x00000001, - SX_PERF_SEL_PA_POS = 0x00000002, - SX_PERF_SEL_CLOCK = 0x00000003, - SX_PERF_SEL_GATE_EN1 = 0x00000004, - SX_PERF_SEL_GATE_EN2 = 0x00000005, - SX_PERF_SEL_GATE_EN3 = 0x00000006, - SX_PERF_SEL_GATE_EN4 = 0x00000007, - SX_PERF_SEL_SH_POS_STARVE = 0x00000008, - SX_PERF_SEL_SH_COLOR_STARVE = 0x00000009, - SX_PERF_SEL_SH_POS_STALL = 0x0000000a, - SX_PERF_SEL_SH_COLOR_STALL = 0x0000000b, - SX_PERF_SEL_DB0_PIXELS = 0x0000000c, - SX_PERF_SEL_DB0_HALF_QUADS = 0x0000000d, - SX_PERF_SEL_DB0_PIXEL_STALL = 0x0000000e, - SX_PERF_SEL_DB0_PIXEL_IDLE = 0x0000000f, - SX_PERF_SEL_DB0_PRED_PIXELS = 0x00000010, - SX_PERF_SEL_DB1_PIXELS = 0x00000011, - SX_PERF_SEL_DB1_HALF_QUADS = 0x00000012, - SX_PERF_SEL_DB1_PIXEL_STALL = 0x00000013, - SX_PERF_SEL_DB1_PIXEL_IDLE = 0x00000014, - SX_PERF_SEL_DB1_PRED_PIXELS = 0x00000015, - SX_PERF_SEL_DB2_PIXELS = 0x00000016, - SX_PERF_SEL_DB2_HALF_QUADS = 0x00000017, - SX_PERF_SEL_DB2_PIXEL_STALL = 0x00000018, - SX_PERF_SEL_DB2_PIXEL_IDLE = 0x00000019, - SX_PERF_SEL_DB2_PRED_PIXELS = 0x0000001a, - SX_PERF_SEL_DB3_PIXELS = 0x0000001b, - SX_PERF_SEL_DB3_HALF_QUADS = 0x0000001c, - SX_PERF_SEL_DB3_PIXEL_STALL = 0x0000001d, - SX_PERF_SEL_DB3_PIXEL_IDLE = 0x0000001e, - SX_PERF_SEL_DB3_PRED_PIXELS = 0x0000001f, - SX_PERF_SEL_COL_BUSY = 0x00000020, - SX_PERF_SEL_POS_BUSY = 0x00000021, - SX_PERF_SEL_DB0_A2M_DISCARD_QUADS__GFX09_10 = 0x00000022, - SX_PERF_SEL_DB0_MRT0_BLEND_BYPASS__GFX09_10 = 0x00000023, - SX_PERF_SEL_DB0_MRT0_DONT_RD_DEST__GFX09_10 = 0x00000024, - SX_PERF_SEL_DB0_MRT0_DISCARD_SRC__GFX09_10 = 0x00000025, - SX_PERF_SEL_DB0_MRT0_SINGLE_QUADS__GFX09_10 = 0x00000026, - SX_PERF_SEL_DB0_MRT0_DOUBLE_QUADS__GFX09_10 = 0x00000027, - SX_PERF_SEL_DB0_MRT1_BLEND_BYPASS__GFX09_10 = 0x00000028, - SX_PERF_SEL_DB0_MRT1_DONT_RD_DEST__GFX09_10 = 0x00000029, - SX_PERF_SEL_DB0_MRT1_DISCARD_SRC__GFX09_10 = 0x0000002a, - SX_PERF_SEL_DB0_MRT1_SINGLE_QUADS__GFX09_10 = 0x0000002b, - SX_PERF_SEL_DB0_MRT1_DOUBLE_QUADS__GFX09_10 = 0x0000002c, - SX_PERF_SEL_DB0_MRT2_BLEND_BYPASS__GFX09_10 = 0x0000002d, - SX_PERF_SEL_DB0_MRT2_DONT_RD_DEST__GFX09_10 = 0x0000002e, - SX_PERF_SEL_DB0_MRT2_DISCARD_SRC__GFX09_10 = 0x0000002f, - SX_PERF_SEL_DB0_MRT2_SINGLE_QUADS__GFX09_10 = 0x00000030, - SX_PERF_SEL_DB0_MRT2_DOUBLE_QUADS__GFX09_10 = 0x00000031, - SX_PERF_SEL_DB0_MRT3_BLEND_BYPASS__GFX09_10 = 0x00000032, - SX_PERF_SEL_DB0_MRT3_DONT_RD_DEST__GFX09_10 = 0x00000033, - SX_PERF_SEL_DB0_MRT3_DISCARD_SRC__GFX09_10 = 0x00000034, - SX_PERF_SEL_DB0_MRT3_SINGLE_QUADS__GFX09_10 = 0x00000035, - SX_PERF_SEL_DB0_MRT3_DOUBLE_QUADS__GFX09_10 = 0x00000036, - SX_PERF_SEL_DB0_MRT4_BLEND_BYPASS__GFX09_10 = 0x00000037, - SX_PERF_SEL_DB0_MRT4_DONT_RD_DEST__GFX09_10 = 0x00000038, - SX_PERF_SEL_DB0_MRT4_DISCARD_SRC__GFX09_10 = 0x00000039, - SX_PERF_SEL_DB0_MRT4_SINGLE_QUADS__GFX09_10 = 0x0000003a, - SX_PERF_SEL_DB0_MRT4_DOUBLE_QUADS__GFX09_10 = 0x0000003b, - SX_PERF_SEL_DB0_MRT5_BLEND_BYPASS__GFX09_10 = 0x0000003c, - SX_PERF_SEL_DB0_MRT5_DONT_RD_DEST__GFX09_10 = 0x0000003d, - SX_PERF_SEL_DB0_MRT5_DISCARD_SRC__GFX09_10 = 0x0000003e, - SX_PERF_SEL_DB0_MRT5_SINGLE_QUADS__GFX09_10 = 0x0000003f, - SX_PERF_SEL_DB0_MRT5_DOUBLE_QUADS__GFX09_10 = 0x00000040, - SX_PERF_SEL_DB0_MRT6_BLEND_BYPASS__GFX09_10 = 0x00000041, - SX_PERF_SEL_DB0_MRT6_DONT_RD_DEST__GFX09_10 = 0x00000042, - SX_PERF_SEL_DB0_MRT6_DISCARD_SRC__GFX09_10 = 0x00000043, - SX_PERF_SEL_DB0_MRT6_SINGLE_QUADS__GFX09_10 = 0x00000044, - SX_PERF_SEL_DB0_MRT6_DOUBLE_QUADS__GFX09_10 = 0x00000045, - SX_PERF_SEL_DB0_MRT7_BLEND_BYPASS__GFX09_10 = 0x00000046, - SX_PERF_SEL_DB0_MRT7_DONT_RD_DEST__GFX09_10 = 0x00000047, - SX_PERF_SEL_DB0_MRT7_DISCARD_SRC__GFX09_10 = 0x00000048, - SX_PERF_SEL_DB0_MRT7_SINGLE_QUADS__GFX09_10 = 0x00000049, - SX_PERF_SEL_DB0_MRT7_DOUBLE_QUADS__GFX09_10 = 0x0000004a, - SX_PERF_SEL_DB1_A2M_DISCARD_QUADS__GFX09_10 = 0x0000004b, - SX_PERF_SEL_DB1_MRT0_BLEND_BYPASS__GFX09_10 = 0x0000004c, - SX_PERF_SEL_DB1_MRT0_DONT_RD_DEST__GFX09_10 = 0x0000004d, - SX_PERF_SEL_DB1_MRT0_DISCARD_SRC__GFX09_10 = 0x0000004e, - SX_PERF_SEL_DB1_MRT0_SINGLE_QUADS__GFX09_10 = 0x0000004f, - SX_PERF_SEL_DB1_MRT0_DOUBLE_QUADS__GFX09_10 = 0x00000050, - SX_PERF_SEL_DB1_MRT1_BLEND_BYPASS__GFX09_10 = 0x00000051, - SX_PERF_SEL_DB1_MRT1_DONT_RD_DEST__GFX09_10 = 0x00000052, - SX_PERF_SEL_DB1_MRT1_DISCARD_SRC__GFX09_10 = 0x00000053, - SX_PERF_SEL_DB1_MRT1_SINGLE_QUADS__GFX09_10 = 0x00000054, - SX_PERF_SEL_DB1_MRT1_DOUBLE_QUADS__GFX09_10 = 0x00000055, - SX_PERF_SEL_DB1_MRT2_BLEND_BYPASS__GFX09_10 = 0x00000056, - SX_PERF_SEL_DB1_MRT2_DONT_RD_DEST__GFX09_10 = 0x00000057, - SX_PERF_SEL_DB1_MRT2_DISCARD_SRC__GFX09_10 = 0x00000058, - SX_PERF_SEL_DB1_MRT2_SINGLE_QUADS__GFX09_10 = 0x00000059, - SX_PERF_SEL_DB1_MRT2_DOUBLE_QUADS__GFX09_10 = 0x0000005a, - SX_PERF_SEL_DB1_MRT3_BLEND_BYPASS__GFX09_10 = 0x0000005b, - SX_PERF_SEL_DB1_MRT3_DONT_RD_DEST__GFX09_10 = 0x0000005c, - SX_PERF_SEL_DB1_MRT3_DISCARD_SRC__GFX09_10 = 0x0000005d, - SX_PERF_SEL_DB1_MRT3_SINGLE_QUADS__GFX09_10 = 0x0000005e, - SX_PERF_SEL_DB1_MRT3_DOUBLE_QUADS__GFX09_10 = 0x0000005f, - SX_PERF_SEL_DB1_MRT4_BLEND_BYPASS__GFX09_10 = 0x00000060, - SX_PERF_SEL_DB1_MRT4_DONT_RD_DEST__GFX09_10 = 0x00000061, - SX_PERF_SEL_DB1_MRT4_DISCARD_SRC__GFX09_10 = 0x00000062, - SX_PERF_SEL_DB1_MRT4_SINGLE_QUADS__GFX09_10 = 0x00000063, - SX_PERF_SEL_DB1_MRT4_DOUBLE_QUADS__GFX09_10 = 0x00000064, - SX_PERF_SEL_DB1_MRT5_BLEND_BYPASS__GFX09_10 = 0x00000065, - SX_PERF_SEL_DB1_MRT5_DONT_RD_DEST__GFX09_10 = 0x00000066, - SX_PERF_SEL_DB1_MRT5_DISCARD_SRC__GFX09_10 = 0x00000067, - SX_PERF_SEL_DB1_MRT5_SINGLE_QUADS__GFX09_10 = 0x00000068, - SX_PERF_SEL_DB1_MRT5_DOUBLE_QUADS__GFX09_10 = 0x00000069, - SX_PERF_SEL_DB1_MRT6_BLEND_BYPASS__GFX09_10 = 0x0000006a, - SX_PERF_SEL_DB1_MRT6_DONT_RD_DEST__GFX09_10 = 0x0000006b, - SX_PERF_SEL_DB1_MRT6_DISCARD_SRC__GFX09_10 = 0x0000006c, - SX_PERF_SEL_DB1_MRT6_SINGLE_QUADS__GFX09_10 = 0x0000006d, - SX_PERF_SEL_DB1_MRT6_DOUBLE_QUADS__GFX09_10 = 0x0000006e, - SX_PERF_SEL_DB1_MRT7_BLEND_BYPASS__GFX09_10 = 0x0000006f, - SX_PERF_SEL_DB1_MRT7_DONT_RD_DEST__GFX09_10 = 0x00000070, - SX_PERF_SEL_DB1_MRT7_DISCARD_SRC__GFX09_10 = 0x00000071, - SX_PERF_SEL_DB1_MRT7_SINGLE_QUADS__GFX09_10 = 0x00000072, - SX_PERF_SEL_DB1_MRT7_DOUBLE_QUADS__GFX09_10 = 0x00000073, - SX_PERF_SEL_DB2_A2M_DISCARD_QUADS__GFX09_10 = 0x00000074, - SX_PERF_SEL_DB2_MRT0_BLEND_BYPASS__GFX09_10 = 0x00000075, - SX_PERF_SEL_DB2_MRT0_DONT_RD_DEST__GFX09_10 = 0x00000076, - SX_PERF_SEL_DB2_MRT0_DISCARD_SRC__GFX09_10 = 0x00000077, - SX_PERF_SEL_DB2_MRT0_SINGLE_QUADS__GFX09_10 = 0x00000078, - SX_PERF_SEL_DB2_MRT0_DOUBLE_QUADS__GFX09_10 = 0x00000079, - SX_PERF_SEL_DB2_MRT1_BLEND_BYPASS__GFX09_10 = 0x0000007a, - SX_PERF_SEL_DB2_MRT1_DONT_RD_DEST__GFX09_10 = 0x0000007b, - SX_PERF_SEL_DB2_MRT1_DISCARD_SRC__GFX09_10 = 0x0000007c, - SX_PERF_SEL_DB2_MRT1_SINGLE_QUADS__GFX09_10 = 0x0000007d, - SX_PERF_SEL_DB2_MRT1_DOUBLE_QUADS__GFX09_10 = 0x0000007e, - SX_PERF_SEL_DB2_MRT2_BLEND_BYPASS__GFX09_10 = 0x0000007f, - SX_PERF_SEL_DB2_MRT2_DONT_RD_DEST__GFX09_10 = 0x00000080, - SX_PERF_SEL_DB2_MRT2_DISCARD_SRC__GFX09_10 = 0x00000081, - SX_PERF_SEL_DB2_MRT2_SINGLE_QUADS__GFX09_10 = 0x00000082, - SX_PERF_SEL_DB2_MRT2_DOUBLE_QUADS__GFX09_10 = 0x00000083, - SX_PERF_SEL_DB2_MRT3_BLEND_BYPASS__GFX09_10 = 0x00000084, - SX_PERF_SEL_DB2_MRT3_DONT_RD_DEST__GFX09_10 = 0x00000085, - SX_PERF_SEL_DB2_MRT3_DISCARD_SRC__GFX09_10 = 0x00000086, - SX_PERF_SEL_DB2_MRT3_SINGLE_QUADS__GFX09_10 = 0x00000087, - SX_PERF_SEL_DB2_MRT3_DOUBLE_QUADS__GFX09_10 = 0x00000088, - SX_PERF_SEL_DB2_MRT4_BLEND_BYPASS__GFX09_10 = 0x00000089, - SX_PERF_SEL_DB2_MRT4_DONT_RD_DEST__GFX09_10 = 0x0000008a, - SX_PERF_SEL_DB2_MRT4_DISCARD_SRC__GFX09_10 = 0x0000008b, - SX_PERF_SEL_DB2_MRT4_SINGLE_QUADS__GFX09_10 = 0x0000008c, - SX_PERF_SEL_DB2_MRT4_DOUBLE_QUADS__GFX09_10 = 0x0000008d, - SX_PERF_SEL_DB2_MRT5_BLEND_BYPASS__GFX09_10 = 0x0000008e, - SX_PERF_SEL_DB2_MRT5_DONT_RD_DEST__GFX09_10 = 0x0000008f, - SX_PERF_SEL_DB2_MRT5_DISCARD_SRC__GFX09_10 = 0x00000090, - SX_PERF_SEL_DB2_MRT5_SINGLE_QUADS__GFX09_10 = 0x00000091, - SX_PERF_SEL_DB2_MRT5_DOUBLE_QUADS__GFX09_10 = 0x00000092, - SX_PERF_SEL_DB2_MRT6_BLEND_BYPASS__GFX09_10 = 0x00000093, - SX_PERF_SEL_DB2_MRT6_DONT_RD_DEST__GFX09_10 = 0x00000094, - SX_PERF_SEL_DB2_MRT6_DISCARD_SRC__GFX09_10 = 0x00000095, - SX_PERF_SEL_DB2_MRT6_SINGLE_QUADS__GFX09_10 = 0x00000096, - SX_PERF_SEL_DB2_MRT6_DOUBLE_QUADS__GFX09_10 = 0x00000097, - SX_PERF_SEL_DB2_MRT7_BLEND_BYPASS__GFX09_10 = 0x00000098, - SX_PERF_SEL_DB2_MRT7_DONT_RD_DEST__GFX09_10 = 0x00000099, - SX_PERF_SEL_DB2_MRT7_DISCARD_SRC__GFX09_10 = 0x0000009a, - SX_PERF_SEL_DB2_MRT7_SINGLE_QUADS__GFX09_10 = 0x0000009b, - SX_PERF_SEL_DB2_MRT7_DOUBLE_QUADS__GFX09_10 = 0x0000009c, - SX_PERF_SEL_DB3_A2M_DISCARD_QUADS__GFX09_10 = 0x0000009d, - SX_PERF_SEL_DB3_MRT0_BLEND_BYPASS__GFX09_10 = 0x0000009e, - SX_PERF_SEL_DB3_MRT0_DONT_RD_DEST__GFX09_10 = 0x0000009f, - SX_PERF_SEL_DB3_MRT0_DISCARD_SRC__GFX09_10 = 0x000000a0, - SX_PERF_SEL_DB3_MRT0_SINGLE_QUADS__GFX09_10 = 0x000000a1, - SX_PERF_SEL_DB3_MRT0_DOUBLE_QUADS__GFX09_10 = 0x000000a2, - SX_PERF_SEL_DB3_MRT1_BLEND_BYPASS__GFX09_10 = 0x000000a3, - SX_PERF_SEL_DB3_MRT1_DONT_RD_DEST__GFX09_10 = 0x000000a4, - SX_PERF_SEL_DB3_MRT1_DISCARD_SRC__GFX09_10 = 0x000000a5, - SX_PERF_SEL_DB3_MRT1_SINGLE_QUADS__GFX09_10 = 0x000000a6, - SX_PERF_SEL_DB3_MRT1_DOUBLE_QUADS__GFX09_10 = 0x000000a7, - SX_PERF_SEL_DB3_MRT2_BLEND_BYPASS__GFX09_10 = 0x000000a8, - SX_PERF_SEL_DB3_MRT2_DONT_RD_DEST__GFX09_10 = 0x000000a9, - SX_PERF_SEL_DB3_MRT2_DISCARD_SRC__GFX09_10 = 0x000000aa, - SX_PERF_SEL_DB3_MRT2_SINGLE_QUADS__GFX09_10 = 0x000000ab, - SX_PERF_SEL_DB3_MRT2_DOUBLE_QUADS__GFX09_10 = 0x000000ac, - SX_PERF_SEL_DB3_MRT3_BLEND_BYPASS__GFX09_10 = 0x000000ad, - SX_PERF_SEL_DB3_MRT3_DONT_RD_DEST__GFX09_10 = 0x000000ae, - SX_PERF_SEL_DB3_MRT3_DISCARD_SRC__GFX09_10 = 0x000000af, - SX_PERF_SEL_DB3_MRT3_SINGLE_QUADS__GFX09_10 = 0x000000b0, - SX_PERF_SEL_DB3_MRT3_DOUBLE_QUADS__GFX09_10 = 0x000000b1, - SX_PERF_SEL_DB3_MRT4_BLEND_BYPASS__GFX09_10 = 0x000000b2, - SX_PERF_SEL_DB3_MRT4_DONT_RD_DEST__GFX09_10 = 0x000000b3, - SX_PERF_SEL_DB3_MRT4_DISCARD_SRC__GFX09_10 = 0x000000b4, - SX_PERF_SEL_DB3_MRT4_SINGLE_QUADS__GFX09_10 = 0x000000b5, - SX_PERF_SEL_DB3_MRT4_DOUBLE_QUADS__GFX09_10 = 0x000000b6, - SX_PERF_SEL_DB3_MRT5_BLEND_BYPASS__GFX09_10 = 0x000000b7, - SX_PERF_SEL_DB3_MRT5_DONT_RD_DEST__GFX09_10 = 0x000000b8, - SX_PERF_SEL_DB3_MRT5_DISCARD_SRC__GFX09_10 = 0x000000b9, - SX_PERF_SEL_DB3_MRT5_SINGLE_QUADS__GFX09_10 = 0x000000ba, - SX_PERF_SEL_DB3_MRT5_DOUBLE_QUADS__GFX09_10 = 0x000000bb, - SX_PERF_SEL_DB3_MRT6_BLEND_BYPASS__GFX09_10 = 0x000000bc, - SX_PERF_SEL_DB3_MRT6_DONT_RD_DEST__GFX09_10 = 0x000000bd, - SX_PERF_SEL_DB3_MRT6_DISCARD_SRC__GFX09_10 = 0x000000be, - SX_PERF_SEL_DB3_MRT6_SINGLE_QUADS__GFX09_10 = 0x000000bf, - SX_PERF_SEL_DB3_MRT6_DOUBLE_QUADS__GFX09_10 = 0x000000c0, - SX_PERF_SEL_DB3_MRT7_BLEND_BYPASS__GFX09_10 = 0x000000c1, - SX_PERF_SEL_DB3_MRT7_DONT_RD_DEST__GFX09_10 = 0x000000c2, - SX_PERF_SEL_DB3_MRT7_DISCARD_SRC__GFX09_10 = 0x000000c3, - SX_PERF_SEL_DB3_MRT7_SINGLE_QUADS__GFX09_10 = 0x000000c4, - SX_PERF_SEL_DB3_MRT7_DOUBLE_QUADS__GFX09_10 = 0x000000c5, - SX_PERF_SEL_PA_REQ_LATENCY__GFX09_10 = 0x000000c6, - SX_PERF_SEL_POS_SCBD_STALL__GFX09_10 = 0x000000c7, - SX_PERF_SEL_COL_SCBD_STALL__GFX09_10 = 0x000000c8, - SX_PERF_SEL_CLOCK_DROP_STALL__GFX09_10 = 0x000000c9, - SX_PERF_SEL_GATE_EN5__GFX09_10 = 0x000000ca, - SX_PERF_SEL_GATE_EN6__GFX09_10 = 0x000000cb, - SX_PERF_SEL_DB0_SIZE__GFX09_10 = 0x000000cc, - SX_PERF_SEL_DB1_SIZE__GFX09_10 = 0x000000cd, - SX_PERF_SEL_DB2_SIZE__GFX09_10 = 0x000000ce, - SX_PERF_SEL_DB3_SIZE__GFX09_10 = 0x000000cf, - SX_PERF_SEL_SPLITMODE__GFX10 = 0x000000d0, - SX_PERF_SEL_COL_SCBD0_STALL__GFX10 = 0x000000d1, - SX_PERF_SEL_COL_SCBD1_STALL__GFX10 = 0x000000d2, - SX_PERF_SEL_IDX_STALL_CYCLES__GFX10 = 0x000000d3, - SX_PERF_SEL_IDX_IDLE_CYCLES__GFX10 = 0x000000d4, - SX_PERF_SEL_IDX_REQ__GFX10 = 0x000000d5, - SX_PERF_SEL_IDX_RET__GFX10 = 0x000000d6, - SX_PERF_SEL_IDX_REQ_LATENCY__GFX10 = 0x000000d7, - SX_PERF_SEL_IDX_SCBD_STALL__GFX10 = 0x000000d8, - SX_PERF_SEL_GATE_EN7__GFX10 = 0x000000d9, - SX_PERF_SEL_GATE_EN8__GFX10 = 0x000000da, - SX_PERF_SEL_SH_IDX_STARVE__GFX10 = 0x000000db, - SX_PERF_SEL_IDX_BUSY__GFX10 = 0x000000dc, - SX_PERF_SEL_RB0_STALL_DUE_TO_ORDERING__GFX10 = 0x000000dd, - SX_PERF_SEL_RB1_STALL_DUE_TO_ORDERING__GFX10 = 0x000000de, - SX_PERF_SEL_RB2_STALL_DUE_TO_ORDERING__GFX10 = 0x000000df, - SX_PERF_SEL_RB3_STALL_DUE_TO_ORDERING__GFX10 = 0x000000e0, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SX_PERF_SEL_DB0_MRT_BLEND_BYPASS__GFX11 = 0x00000022, - SX_PERF_SEL_DB0_MRT_DONT_RD_DEST__GFX11 = 0x00000023, - SX_PERF_SEL_DB0_MRT_DISCARD_SRC__GFX11 = 0x00000024, - SX_PERF_SEL_DB0_MRT_SINGLE_QUADS__GFX11 = 0x00000025, - SX_PERF_SEL_DB0_MRT_DOUBLE_QUADS__GFX11 = 0x00000026, - SX_PERF_SEL_DB1_MRT_BLEND_BYPASS__GFX11 = 0x00000027, - SX_PERF_SEL_DB1_MRT_DONT_RD_DEST__GFX11 = 0x00000028, - SX_PERF_SEL_DB1_MRT_DISCARD_SRC__GFX11 = 0x00000029, - SX_PERF_SEL_DB1_MRT_SINGLE_QUADS__GFX11 = 0x0000002a, - SX_PERF_SEL_DB1_MRT_DOUBLE_QUADS__GFX11 = 0x0000002b, - SX_PERF_SEL_DB2_MRT_BLEND_BYPASS__GFX11 = 0x0000002c, - SX_PERF_SEL_DB2_MRT_DONT_RD_DEST__GFX11 = 0x0000002d, - SX_PERF_SEL_DB2_MRT_DISCARD_SRC__GFX11 = 0x0000002e, - SX_PERF_SEL_DB2_MRT_SINGLE_QUADS__GFX11 = 0x0000002f, - SX_PERF_SEL_DB2_MRT_DOUBLE_QUADS__GFX11 = 0x00000030, - SX_PERF_SEL_DB3_MRT_BLEND_BYPASS__GFX11 = 0x00000031, - SX_PERF_SEL_DB3_MRT_DONT_RD_DEST__GFX11 = 0x00000032, - SX_PERF_SEL_DB3_MRT_DISCARD_SRC__GFX11 = 0x00000033, - SX_PERF_SEL_DB3_MRT_SINGLE_QUADS__GFX11 = 0x00000034, - SX_PERF_SEL_DB3_MRT_DOUBLE_QUADS__GFX11 = 0x00000035, - SX_PERF_SEL_PA_REQ_LATENCY__GFX11 = 0x00000036, - SX_PERF_SEL_POS_SCBD_STALL__GFX11 = 0x00000037, - SX_PERF_SEL_CLOCK_DROP_STALL__GFX11 = 0x00000038, - SX_PERF_SEL_GATE_EN5__GFX11 = 0x00000039, - SX_PERF_SEL_GATE_EN6__GFX11 = 0x0000003a, - SX_PERF_SEL_DB0_SIZE__GFX11 = 0x0000003b, - SX_PERF_SEL_DB1_SIZE__GFX11 = 0x0000003c, - SX_PERF_SEL_DB2_SIZE__GFX11 = 0x0000003d, - SX_PERF_SEL_DB3_SIZE__GFX11 = 0x0000003e, - SX_PERF_SEL_IDX_STALL_CYCLES__GFX11 = 0x0000003f, - SX_PERF_SEL_IDX_IDLE_CYCLES__GFX11 = 0x00000040, - SX_PERF_SEL_IDX_REQ__GFX11 = 0x00000041, - SX_PERF_SEL_IDX_RET__GFX11 = 0x00000042, - SX_PERF_SEL_IDX_REQ_LATENCY__GFX11 = 0x00000043, - SX_PERF_SEL_IDX_SCBD_STALL__GFX11 = 0x00000044, - SX_PERF_SEL_GATE_EN7__GFX11 = 0x00000045, - SX_PERF_SEL_GATE_EN8__GFX11 = 0x00000046, - SX_PERF_SEL_SH_IDX_STARVE__GFX11 = 0x00000047, - SX_PERF_SEL_IDX_BUSY__GFX11 = 0x00000048, - SX_PERF_SEL_PA_POS_BANK_CONF__GFX11 = 0x00000049, - SX_PERF_SEL_DB0_END_OF_WAVE__GFX11 = 0x0000004a, - SX_PERF_SEL_DB0_4X2_DISCARD__GFX11 = 0x0000004b, - SX_PERF_SEL_DB1_END_OF_WAVE__GFX11 = 0x0000004c, - SX_PERF_SEL_DB1_4X2_DISCARD__GFX11 = 0x0000004d, - SX_PERF_SEL_DB2_END_OF_WAVE__GFX11 = 0x0000004e, - SX_PERF_SEL_DB2_4X2_DISCARD__GFX11 = 0x0000004f, - SX_PERF_SEL_DB3_END_OF_WAVE__GFX11 = 0x00000050, - SX_PERF_SEL_DB3_4X2_DISCARD__GFX11 = 0x00000051, -#endif -} SX_PERFCOUNTER_VALS; - -constexpr unsigned int MaxSxPerfcounterValsGfx09 = SX_PERF_SEL_DB3_SIZE__GFX09_10; -constexpr unsigned int MaxSxPerfcounterValsGfx10Core = SX_PERF_SEL_RB3_STALL_DUE_TO_ORDERING__GFX10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxSxPerfcounterValsGfx11 = SX_PERF_SEL_DB3_4X2_DISCARD__GFX11; -#endif - -typedef enum TA_PERFCOUNT_SEL { - TA_PERF_SEL_NULL = 0x00000000, - TA_PERF_SEL_gradient_busy = 0x00000007, - TA_PERF_SEL_gradient_fifo_busy = 0x00000008, - TA_PERF_SEL_lod_busy = 0x00000009, - TA_PERF_SEL_lod_fifo_busy = 0x0000000a, - TA_PERF_SEL_addresser_busy = 0x0000000b, - TA_PERF_SEL_addresser_fifo_busy = 0x0000000c, - TA_PERF_SEL_aligner_busy = 0x0000000d, - TA_PERF_SEL_write_path_busy = 0x0000000e, - TA_PERF_SEL_ta_busy = 0x0000000f, - TA_PERF_SEL_total_wavefronts = 0x00000020, - TA_PERF_SEL_gradient_cycles = 0x00000021, - TA_PERF_SEL_walker_cycles = 0x00000022, - TA_PERF_SEL_aligner_cycles = 0x00000023, - TA_PERF_SEL_image_wavefronts = 0x00000024, - TA_PERF_SEL_image_read_wavefronts = 0x00000025, - TA_PERF_SEL_image_atomic_wavefronts = 0x00000027, - TA_PERF_SEL_buffer_wavefronts = 0x0000002c, - TA_PERF_SEL_buffer_atomic_wavefronts = 0x0000002f, - TA_PERF_SEL_buffer_total_cycles = 0x00000031, - TA_PERF_SEL_addr_stalled_by_tc_cycles = 0x00000036, - TA_PERF_SEL_addr_stalled_by_td_cycles = 0x00000037, - TA_PERF_SEL_addresser_stalled_by_aligner_only_cycles = 0x00000039, - TA_PERF_SEL_addresser_stalled_cycles = 0x0000003a, - TA_PERF_SEL_aniso_stalled_by_addresser_only_cycles = 0x0000003b, - TA_PERF_SEL_aniso_stalled_cycles = 0x0000003c, - TA_PERF_SEL_deriv_stalled_by_aniso_only_cycles = 0x0000003d, - TA_PERF_SEL_deriv_stalled_cycles = 0x0000003e, - TA_PERF_SEL_aniso_gt1_cycle_quads = 0x0000003f, - TA_PERF_SEL_mipmap_lod_0_samples = 0x00000049, - TA_PERF_SEL_mipmap_lod_1_samples = 0x0000004a, - TA_PERF_SEL_mipmap_lod_2_samples = 0x0000004b, - TA_PERF_SEL_mipmap_lod_3_samples = 0x0000004c, - TA_PERF_SEL_mipmap_lod_4_samples = 0x0000004d, - TA_PERF_SEL_mipmap_lod_5_samples = 0x0000004e, - TA_PERF_SEL_mipmap_lod_6_samples = 0x0000004f, - TA_PERF_SEL_mipmap_lod_7_samples = 0x00000050, - TA_PERF_SEL_mipmap_lod_8_samples = 0x00000051, - TA_PERF_SEL_mipmap_lod_9_samples = 0x00000052, - TA_PERF_SEL_mipmap_lod_10_samples = 0x00000053, - TA_PERF_SEL_mipmap_lod_11_samples = 0x00000054, - TA_PERF_SEL_mipmap_lod_12_samples = 0x00000055, - TA_PERF_SEL_mipmap_lod_13_samples = 0x00000056, - TA_PERF_SEL_mipmap_lod_14_samples = 0x00000057, - TA_PERF_SEL_mipmap_invalid_samples = 0x00000058, - TA_PERF_SEL_aniso_1_cycle_quads = 0x00000059, - TA_PERF_SEL_aniso_2_cycle_quads = 0x0000005a, - TA_PERF_SEL_aniso_4_cycle_quads = 0x0000005b, - TA_PERF_SEL_aniso_6_cycle_quads = 0x0000005c, - TA_PERF_SEL_aniso_8_cycle_quads = 0x0000005d, - TA_PERF_SEL_aniso_10_cycle_quads = 0x0000005e, - TA_PERF_SEL_aniso_12_cycle_quads = 0x0000005f, - TA_PERF_SEL_aniso_14_cycle_quads = 0x00000060, - TA_PERF_SEL_aniso_16_cycle_quads = 0x00000061, - TA_PERF_SEL_flat_wavefronts = 0x00000064, - TA_PERF_SEL_flat_atomic_wavefronts = 0x00000067, - TA_PERF_SEL_sh_fifo_busy__GFX09 = 0x00000001, - TA_PERF_SEL_sh_fifo_cmd_busy__GFX09 = 0x00000002, - TA_PERF_SEL_sh_fifo_addr_busy__GFX09 = 0x00000003, - TA_PERF_SEL_sh_fifo_data_busy__GFX09 = 0x00000004, - TA_PERF_SEL_sh_fifo_data_sfifo_busy__GFX09 = 0x00000005, - TA_PERF_SEL_sh_fifo_data_tfifo_busy__GFX09 = 0x00000006, - TA_PERF_SEL_sq_ta_cmd_cycles__GFX09 = 0x00000010, - TA_PERF_SEL_sp_ta_addr_cycles__GFX09 = 0x00000011, - TA_PERF_SEL_sp_ta_data_cycles__GFX09 = 0x00000012, - TA_PERF_SEL_ta_fa_data_state_cycles__GFX09 = 0x00000013, - TA_PERF_SEL_sh_fifo_addr_waiting_on_cmd_cycles__GFX09 = 0x00000014, - TA_PERF_SEL_sh_fifo_cmd_waiting_on_addr_cycles__GFX09 = 0x00000015, - TA_PERF_SEL_sh_fifo_addr_starved_while_busy_cycles__GFX09 = 0x00000016, - TA_PERF_SEL_sh_fifo_cmd_starved_while_busy_cycles__GFX09 = 0x00000017, - TA_PERF_SEL_sh_fifo_data_waiting_on_data_state_cycles__GFX09 = 0x00000018, - TA_PERF_SEL_sh_fifo_data_state_waiting_on_data_cycles__GFX09 = 0x00000019, - TA_PERF_SEL_sh_fifo_data_starved_while_busy_cycles__GFX09 = 0x0000001a, - TA_PERF_SEL_sh_fifo_data_state_starved_while_busy_cycles__GFX09 = 0x0000001b, - TA_PERF_SEL_ta_sh_fifo_starved__GFX09 = 0x0000001c, - TA_PERF_SEL_RESERVED_29__GFX09 = 0x0000001d, - TA_PERF_SEL_sh_fifo_addr_cycles__GFX09 = 0x0000001e, - TA_PERF_SEL_sh_fifo_data_cycles__GFX09 = 0x0000001f, - TA_PERF_SEL_image_write_wavefronts__GFX09 = 0x00000026, - TA_PERF_SEL_image_total_cycles__GFX09 = 0x00000028, - TA_PERF_SEL_RESERVED_41__GFX09 = 0x00000029, - TA_PERF_SEL_RESERVED_42__GFX09 = 0x0000002a, - TA_PERF_SEL_RESERVED_43__GFX09 = 0x0000002b, - TA_PERF_SEL_buffer_read_wavefronts__GFX09 = 0x0000002d, - TA_PERF_SEL_buffer_write_wavefronts__GFX09 = 0x0000002e, - TA_PERF_SEL_buffer_coalescable_wavefronts__GFX09 = 0x00000030, - TA_PERF_SEL_buffer_coalescable_addr_multicycled_cycles__GFX09 = 0x00000032, - TA_PERF_SEL_buffer_coalescable_clamp_16kdword_multicycled_cycles__GFX09 = 0x00000033, - TA_PERF_SEL_buffer_coalesced_read_cycles__GFX09 = 0x00000034, - TA_PERF_SEL_buffer_coalesced_write_cycles__GFX09 = 0x00000035, - TA_PERF_SEL_data_stalled_by_tc_cycles__GFX09 = 0x00000038, - TA_PERF_SEL_color_1_cycle_pixels__GFX09 = 0x00000040, - TA_PERF_SEL_color_2_cycle_pixels__GFX09 = 0x00000041, - TA_PERF_SEL_color_3_cycle_pixels__GFX09 = 0x00000042, - TA_PERF_SEL_color_4_cycle_pixels__GFX09 = 0x00000043, - TA_PERF_SEL_mip_1_cycle_pixels__GFX09 = 0x00000044, - TA_PERF_SEL_mip_2_cycle_pixels__GFX09 = 0x00000045, - TA_PERF_SEL_vol_1_cycle_pixels__GFX09 = 0x00000046, - TA_PERF_SEL_vol_2_cycle_pixels__GFX09 = 0x00000047, - TA_PERF_SEL_bilin_point_1_cycle_pixels__GFX09 = 0x00000048, - TA_PERF_SEL_write_path_input_cycles__GFX09 = 0x00000062, - TA_PERF_SEL_write_path_output_cycles__GFX09 = 0x00000063, - TA_PERF_SEL_flat_read_wavefronts__GFX09 = 0x00000065, - TA_PERF_SEL_flat_write_wavefronts__GFX09 = 0x00000066, - TA_PERF_SEL_flat_coalesceable_wavefronts__GFX09 = 0x00000068, - TA_PERF_SEL_reg_sclk_vld__GFX09 = 0x00000069, - TA_PERF_SEL_local_cg_dyn_sclk_grp0_en__GFX09 = 0x0000006a, - TA_PERF_SEL_local_cg_dyn_sclk_grp1_en__GFX09 = 0x0000006b, - TA_PERF_SEL_local_cg_dyn_sclk_grp1_mems_en__GFX09 = 0x0000006c, - TA_PERF_SEL_local_cg_dyn_sclk_grp4_en__GFX09 = 0x0000006d, - TA_PERF_SEL_local_cg_dyn_sclk_grp5_en__GFX09 = 0x0000006e, - TA_PERF_SEL_xnack_on_phase0__GFX09 = 0x0000006f, - TA_PERF_SEL_xnack_on_phase1__GFX09 = 0x00000070, - TA_PERF_SEL_xnack_on_phase2__GFX09 = 0x00000071, - TA_PERF_SEL_xnack_on_phase3__GFX09 = 0x00000072, - TA_PERF_SEL_first_xnack_on_phase0__GFX09 = 0x00000073, - TA_PERF_SEL_first_xnack_on_phase1__GFX09 = 0x00000074, - TA_PERF_SEL_first_xnack_on_phase2__GFX09 = 0x00000075, - TA_PERF_SEL_first_xnack_on_phase3__GFX09 = 0x00000076, - TA_PERF_SEL_RESERVED_31__GFX10 = 0x0000001f, - TA_PERF_SEL_RESERVED_48__GFX10 = 0x00000030, - TA_PERF_SEL_RESERVED_67__GFX10 = 0x00000043, - TA_PERF_SEL_RESERVED_112__GFX10 = 0x00000070, - TA_PERF_SEL_RESERVED_113__GFX10 = 0x00000071, - TA_PERF_SEL_RESERVED_138__GFX10 = 0x0000008a, - TA_PERF_SEL_RESERVED_139__GFX10 = 0x0000008b, - TA_PERF_SEL_RESERVED_140__GFX10 = 0x0000008c, - TA_PERF_SEL_RESERVED_141__GFX10 = 0x0000008d, - TA_PERF_SEL_RESERVED_142__GFX10 = 0x0000008e, - TA_PERF_SEL_RESERVED_143__GFX10 = 0x0000008f, - TA_PERF_SEL_RESERVED_149__GFX10 = 0x00000095, - TA_PERF_SEL_RESERVED_153__GFX10 = 0x00000099, - TA_PERF_SEL_RESERVED_166__GFX10 = 0x000000a6, - TA_PERF_SEL_RESERVED_167__GFX10 = 0x000000a7, - TA_PERF_SEL_RESERVED_168__GFX10 = 0x000000a8, - TA_PERF_SEL_RESERVED_169__GFX10 = 0x000000a9, - TA_PERF_SEL_RESERVED_222__GFX10 = 0x000000de, - TA_PERF_SEL_RESERVED_223__GFX10 = 0x000000df, - TA_PERF_SEL_RESERVED_1__GFX101 = 0x00000001, - TA_PERF_SEL_RESERVED_2__GFX101 = 0x00000002, - TA_PERF_SEL_RESERVED_3__GFX101 = 0x00000003, - TA_PERF_SEL_RESERVED_4__GFX101 = 0x00000004, - TA_PERF_SEL_RESERVED_5__GFX101 = 0x00000005, - TA_PERF_SEL_RESERVED_6__GFX101 = 0x00000006, - TA_PERF_SEL_RESERVED_16__GFX101 = 0x00000010, - TA_PERF_SEL_RESERVED_17__GFX101 = 0x00000011, - TA_PERF_SEL_RESERVED_18__GFX101 = 0x00000012, - TA_PERF_SEL_RESERVED_19__GFX101 = 0x00000013, - TA_PERF_SEL_RESERVED_20__GFX101 = 0x00000014, - TA_PERF_SEL_RESERVED_21__GFX101 = 0x00000015, - TA_PERF_SEL_RESERVED_22__GFX101 = 0x00000016, - TA_PERF_SEL_RESERVED_23__GFX101 = 0x00000017, - TA_PERF_SEL_RESERVED_24__GFX101 = 0x00000018, - TA_PERF_SEL_RESERVED_25__GFX101 = 0x00000019, - TA_PERF_SEL_RESERVED_26__GFX101 = 0x0000001a, - TA_PERF_SEL_RESERVED_27__GFX101 = 0x0000001b, - TA_PERF_SEL_RESERVED_28__GFX101 = 0x0000001c, - TA_PERF_SEL_RESERVED_29__GFX101 = 0x0000001d, - TA_PERF_SEL_RESERVED_30__GFX101 = 0x0000001e, - TA_PERF_SEL_image_write_wavefronts__GFX101 = 0x00000026, - TA_PERF_SEL_RESERVED_43__GFX101 = 0x0000002b, - TA_PERF_SEL_buffer_read_wavefronts__GFX101 = 0x0000002d, - TA_PERF_SEL_buffer_write_wavefronts__GFX101 = 0x0000002e, - TA_PERF_SEL_RESERVED_50__GFX101 = 0x00000032, - TA_PERF_SEL_RESERVED_51__GFX101 = 0x00000033, - TA_PERF_SEL_RESERVED_52__GFX101 = 0x00000034, - TA_PERF_SEL_RESERVED_53__GFX101 = 0x00000035, - TA_PERF_SEL_RESERVED_56__GFX101 = 0x00000038, - TA_PERF_SEL_color_1_cycle_pixels__GFX101 = 0x00000040, - TA_PERF_SEL_color_2_cycle_pixels__GFX101 = 0x00000041, - TA_PERF_SEL_color_3_cycle_pixels__GFX101 = 0x00000042, - TA_PERF_SEL_mip_1_cycle_pixels__GFX101 = 0x00000044, - TA_PERF_SEL_mip_2_cycle_pixels__GFX101 = 0x00000045, - TA_PERF_SEL_vol_1_cycle_pixels__GFX101 = 0x00000046, - TA_PERF_SEL_vol_2_cycle_pixels__GFX101 = 0x00000047, - TA_PERF_SEL_bilin_point_1_cycle_pixels__GFX101 = 0x00000048, - TA_PERF_SEL_write_path_input_cycles__GFX101 = 0x00000062, - TA_PERF_SEL_write_path_output_cycles__GFX101 = 0x00000063, - TA_PERF_SEL_flat_read_wavefronts__GFX101 = 0x00000065, - TA_PERF_SEL_flat_write_wavefronts__GFX101 = 0x00000066, - TA_PERF_SEL_RESERVED_104__GFX101 = 0x00000068, - TA_PERF_SEL_reg_sclk_vld__GFX101 = 0x00000069, - TA_PERF_SEL_local_cg_dyn_sclk_grp0_en__GFX101 = 0x0000006a, - TA_PERF_SEL_local_cg_dyn_sclk_grp1_en__GFX101 = 0x0000006b, - TA_PERF_SEL_local_cg_dyn_sclk_grp1_mems_en__GFX101 = 0x0000006c, - TA_PERF_SEL_local_cg_dyn_sclk_grp4_en__GFX101 = 0x0000006d, - TA_PERF_SEL_local_cg_dyn_sclk_grp5_en__GFX101 = 0x0000006e, - TA_PERF_SEL_xnack__GFX101 = 0x0000006f, - TA_PERF_SEL_RESERVED_114__GFX101 = 0x00000072, - TA_PERF_SEL_first_xnack__GFX101 = 0x00000073, - TA_PERF_SEL_RESERVED_116__GFX101 = 0x00000074, - TA_PERF_SEL_RESERVED_117__GFX101 = 0x00000075, - TA_PERF_SEL_RESERVED_118__GFX101 = 0x00000076, - TA_PERF_SEL_RESERVED_119__GFX101 = 0x00000077, - TA_PERF_SEL_RESERVED_120__GFX101 = 0x00000078, - TA_PERF_SEL_RESERVED_121__GFX101 = 0x00000079, - TA_PERF_SEL_RESERVED_122__GFX101 = 0x0000007a, - TA_PERF_SEL_RESERVED_123__GFX101 = 0x0000007b, - TA_PERF_SEL_RESERVED_124__GFX101 = 0x0000007c, - TA_PERF_SEL_RESERVED_125__GFX101 = 0x0000007d, - TA_PERF_SEL_RESERVED_126__GFX101 = 0x0000007e, - TA_PERF_SEL_RESERVED_127__GFX101 = 0x0000007f, - TA_PERF_SEL_in_waiting_on_cmd_cycles__GFX101 = 0x00000093, - TA_PERF_SEL_in_waiting_on_any_cycles__GFX101 = 0x00000094, - TA_PERF_SEL_addr_stalled_by_xnack_cycles__GFX101 = 0x00000098, - TA_PERF_SEL_RESERVED_154__GFX101 = 0x0000009a, - TA_PERF_SEL_RESERVED_155__GFX101 = 0x0000009b, - TA_PERF_SEL_RESERVED_156__GFX101 = 0x0000009c, - TA_PERF_SEL_RESERVED_157__GFX101 = 0x0000009d, - TA_PERF_SEL_RESERVED_158__GFX101 = 0x0000009e, - TA_PERF_SEL_RESERVED_159__GFX101 = 0x0000009f, - TA_PERF_SEL_RESERVED_162__GFX101 = 0x000000a2, - TA_PERF_SEL_RESERVED_163__GFX101 = 0x000000a3, - TA_PERF_SEL_RESERVED_164__GFX101 = 0x000000a4, - TA_PERF_SEL_RESERVED_165__GFX101 = 0x000000a5, - TA_PERF_SEL_RESERVED_173__GFX101 = 0x000000ad, - TA_PERF_SEL_RESERVED_174__GFX101 = 0x000000ae, - TA_PERF_SEL_RESERVED_175__GFX101 = 0x000000af, - TA_PERF_SEL_RESERVED_176__GFX101 = 0x000000b0, - TA_PERF_SEL_RESERVED_177__GFX101 = 0x000000b1, - TA_PERF_SEL_RESERVED_178__GFX101 = 0x000000b2, - TA_PERF_SEL_RESERVED_179__GFX101 = 0x000000b3, - TA_PERF_SEL_RESERVED_180__GFX101 = 0x000000b4, - TA_PERF_SEL_RESERVED_181__GFX101 = 0x000000b5, - TA_PERF_SEL_RESERVED_182__GFX101 = 0x000000b6, - TA_PERF_SEL_RESERVED_183__GFX101 = 0x000000b7, - TA_PERF_SEL_RESERVED_184__GFX101 = 0x000000b8, - TA_PERF_SEL_RESERVED_185__GFX101 = 0x000000b9, - TA_PERF_SEL_RESERVED_186__GFX101 = 0x000000ba, - TA_PERF_SEL_RESERVED_187__GFX101 = 0x000000bb, - TA_PERF_SEL_RESERVED_188__GFX101 = 0x000000bc, - TA_PERF_SEL_RESERVED_189__GFX101 = 0x000000bd, - TA_PERF_SEL_RESERVED_190__GFX101 = 0x000000be, - TA_PERF_SEL_RESERVED_191__GFX101 = 0x000000bf, - TA_PERF_SEL_RESERVED_192__GFX101 = 0x000000c0, - TA_PERF_SEL_RESERVED_193__GFX101 = 0x000000c1, - TA_PERF_SEL_RESERVED_194__GFX101 = 0x000000c2, - TA_PERF_SEL_RESERVED_195__GFX101 = 0x000000c3, - TA_PERF_SEL_RESERVED_196__GFX101 = 0x000000c4, - TA_PERF_SEL_RESERVED_197__GFX101 = 0x000000c5, - TA_PERF_SEL_RESERVED_198__GFX101 = 0x000000c6, - TA_PERF_SEL_RESERVED_199__GFX101 = 0x000000c7, - TA_PERF_SEL_RESERVED_200__GFX101 = 0x000000c8, - TA_PERF_SEL_RESERVED_201__GFX101 = 0x000000c9, - TA_PERF_SEL_RESERVED_202__GFX101 = 0x000000ca, - TA_PERF_SEL_RESERVED_203__GFX101 = 0x000000cb, - TA_PERF_SEL_RESERVED_204__GFX101 = 0x000000cc, - TA_PERF_SEL_RESERVED_205__GFX101 = 0x000000cd, - TA_PERF_SEL_RESERVED_206__GFX101 = 0x000000ce, - TA_PERF_SEL_RESERVED_207__GFX101 = 0x000000cf, - TA_PERF_SEL_RESERVED_208__GFX101 = 0x000000d0, - TA_PERF_SEL_RESERVED_209__GFX101 = 0x000000d1, - TA_PERF_SEL_RESERVED_210__GFX101 = 0x000000d2, - TA_PERF_SEL_RESERVED_211__GFX101 = 0x000000d3, - TA_PERF_SEL_RESERVED_212__GFX101 = 0x000000d4, - TA_PERF_SEL_RESERVED_213__GFX101 = 0x000000d5, - TA_PERF_SEL_RESERVED_214__GFX101 = 0x000000d6, - TA_PERF_SEL_RESERVED_215__GFX101 = 0x000000d7, - TA_PERF_SEL_RESERVED_216__GFX101 = 0x000000d8, - TA_PERF_SEL_RESERVED_217__GFX101 = 0x000000d9, - TA_PERF_SEL_RESERVED_218__GFX101 = 0x000000da, - TA_PERF_SEL_RESERVED_219__GFX101 = 0x000000db, - TA_PERF_SEL_RESERVED_220__GFX101 = 0x000000dc, - TA_PERF_SEL_RESERVED_221__GFX101 = 0x000000dd, - TA_PERF_SEL_sample_path_cycles__GFX101 = 0x000000e0, - TA_PERF_SEL_nosample_path_cycles__GFX101 = 0x000000e1, - TA_PERF_SEL_harvestable_register_clk_enabled_cycles__GFX103COREPLUS = 0x0000006c, - TA_PERF_SEL_RESERVED_111__GFX103DERIVATIVE = 0x0000006f, - TA_PERF_SEL_RESERVED_147__GFX103DERIVATIVE = 0x00000093, - TA_PERF_SEL_RESERVED_148__GFX103DERIVATIVE = 0x00000094, - TA_PERF_SEL_RESERVED_152__GFX103DERIVATIVE = 0x00000098, - TA_PERF_SEL_bvh_total_cycles__GFX103PLUS = 0x0000002b, - TA_PERF_SEL_num_nodes_invalidated_due_to_bad_input__GFX103PLUS = 0x000000ae, - TA_PERF_SEL_num_nodes_invalidated_due_to_oob__GFX103PLUS = 0x000000af, - TA_PERF_SEL_num_of_bvh_valid_first_tri__GFX103PLUS = 0x000000b0, - TA_PERF_SEL_num_of_bvh_valid_second_tri__GFX103PLUS = 0x000000b1, - TA_PERF_SEL_num_of_bvh_valid_third_tri__GFX103PLUS = 0x000000b2, - TA_PERF_SEL_num_of_bvh_valid_fourth_tri__GFX103PLUS = 0x000000b3, - TA_PERF_SEL_num_of_bvh_valid_fp16_box__GFX103PLUS = 0x000000b4, - TA_PERF_SEL_num_of_bvh_valid_fp32_box__GFX103PLUS = 0x000000b5, - TA_PERF_SEL_num_of_bvh_invalidated_first_tri__GFX103PLUS = 0x000000b6, - TA_PERF_SEL_num_of_bvh_invalidated_second_tri__GFX103PLUS = 0x000000b7, - TA_PERF_SEL_num_of_bvh_invalidated_third_tri__GFX103PLUS = 0x000000b8, - TA_PERF_SEL_num_of_bvh_invalidated_fourth_tri__GFX103PLUS = 0x000000b9, - TA_PERF_SEL_num_of_bvh_invalidated_fp16_box__GFX103PLUS = 0x000000ba, - TA_PERF_SEL_num_of_bvh_invalidated_fp32_box__GFX103PLUS = 0x000000bb, - TA_PERF_SEL_image_sampler_has_offset_instructions__GFX103PLUSEXCLUSIVE = 0x00000001, - TA_PERF_SEL_image_sampler_has_bias_instructions__GFX103PLUSEXCLUSIVE = 0x00000002, - TA_PERF_SEL_image_sampler_has_reference_instructions__GFX103PLUSEXCLUSIVE = 0x00000003, - TA_PERF_SEL_image_sampler_has_ds_instructions__GFX103PLUSEXCLUSIVE = 0x00000004, - TA_PERF_SEL_image_sampler_has_dt_instructions__GFX103PLUSEXCLUSIVE = 0x00000005, - TA_PERF_SEL_image_sampler_has_dr_instructions__GFX103PLUSEXCLUSIVE = 0x00000006, - TA_PERF_SEL_image_sampler_1_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000010, - TA_PERF_SEL_image_sampler_2_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000011, - TA_PERF_SEL_image_sampler_3_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000012, - TA_PERF_SEL_image_sampler_4_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000013, - TA_PERF_SEL_image_sampler_5_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000014, - TA_PERF_SEL_image_sampler_6_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000015, - TA_PERF_SEL_image_sampler_7_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000016, - TA_PERF_SEL_image_sampler_8_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000017, - TA_PERF_SEL_image_sampler_9_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000018, - TA_PERF_SEL_image_sampler_10_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000019, - TA_PERF_SEL_image_sampler_11_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x0000001a, - TA_PERF_SEL_image_sampler_12_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x0000001b, - TA_PERF_SEL_image_sampler_has_t_instructions__GFX103PLUSEXCLUSIVE = 0x0000001c, - TA_PERF_SEL_image_sampler_has_r_instructions__GFX103PLUSEXCLUSIVE = 0x0000001d, - TA_PERF_SEL_image_sampler_has_q_instructions__GFX103PLUSEXCLUSIVE = 0x0000001e, - TA_PERF_SEL_image_store_wavefronts__GFX103PLUSEXCLUSIVE = 0x00000026, - TA_PERF_SEL_buffer_load_wavefronts__GFX103PLUSEXCLUSIVE = 0x0000002d, - TA_PERF_SEL_buffer_store_wavefronts__GFX103PLUSEXCLUSIVE = 0x0000002e, - TA_PERF_SEL_buffer_1_address_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000032, - TA_PERF_SEL_buffer_2_address_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000033, - TA_PERF_SEL_buffer_has_index_instructions__GFX103PLUSEXCLUSIVE = 0x00000034, - TA_PERF_SEL_buffer_has_offset_instructions__GFX103PLUSEXCLUSIVE = 0x00000035, - TA_PERF_SEL_image_sampler_wavefronts__GFX103PLUSEXCLUSIVE = 0x00000038, - TA_PERF_SEL_color_1_cycle_quads__GFX103PLUSEXCLUSIVE = 0x00000040, - TA_PERF_SEL_color_2_cycle_quads__GFX103PLUSEXCLUSIVE = 0x00000041, - TA_PERF_SEL_color_3_cycle_quads__GFX103PLUSEXCLUSIVE = 0x00000042, - TA_PERF_SEL_mip_1_cycle_quads__GFX103PLUSEXCLUSIVE = 0x00000044, - TA_PERF_SEL_mip_2_cycle_quads__GFX103PLUSEXCLUSIVE = 0x00000045, - TA_PERF_SEL_vol_1_cycle_quads__GFX103PLUSEXCLUSIVE = 0x00000046, - TA_PERF_SEL_vol_2_cycle_quads__GFX103PLUSEXCLUSIVE = 0x00000047, - TA_PERF_SEL_sampler_op_quads__GFX103PLUSEXCLUSIVE = 0x00000048, - TA_PERF_SEL_store_write_data_input_cycles__GFX103PLUSEXCLUSIVE = 0x00000062, - TA_PERF_SEL_store_write_data_output_cycles__GFX103PLUSEXCLUSIVE = 0x00000063, - TA_PERF_SEL_flat_load_wavefronts__GFX103PLUSEXCLUSIVE = 0x00000065, - TA_PERF_SEL_flat_store_wavefronts__GFX103PLUSEXCLUSIVE = 0x00000066, - TA_PERF_SEL_flat_1_address_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000068, - TA_PERF_SEL_register_clk_valid_cycles__GFX103PLUSEXCLUSIVE = 0x00000069, - TA_PERF_SEL_non_harvestable_clk_enabled_cycles__GFX103PLUSEXCLUSIVE = 0x0000006a, - TA_PERF_SEL_harvestable_clk_enabled_cycles__GFX103PLUSEXCLUSIVE = 0x0000006b, - TA_PERF_SEL_boundary_non_harvestable_clk_enabled_cycles__GFX103PLUSEXCLUSIVE = 0x0000006d, - TA_PERF_SEL_boundary_harvestable_clk_enabled_cycles__GFX103PLUSEXCLUSIVE = 0x0000006e, - TA_PERF_SEL_store_2_write_data_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000072, - TA_PERF_SEL_store_3_write_data_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000073, - TA_PERF_SEL_store_4_write_data_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x00000074, - TA_PERF_SEL_store_has_x_instructions__GFX103PLUSEXCLUSIVE = 0x00000075, - TA_PERF_SEL_store_has_y_instructions__GFX103PLUSEXCLUSIVE = 0x00000076, - TA_PERF_SEL_store_has_z_instructions__GFX103PLUSEXCLUSIVE = 0x00000077, - TA_PERF_SEL_store_has_w_instructions__GFX103PLUSEXCLUSIVE = 0x00000078, - TA_PERF_SEL_image_nosampler_has_t_instructions__GFX103PLUSEXCLUSIVE = 0x00000079, - TA_PERF_SEL_image_nosampler_has_r_instructions__GFX103PLUSEXCLUSIVE = 0x0000007a, - TA_PERF_SEL_image_nosampler_has_q_instructions__GFX103PLUSEXCLUSIVE = 0x0000007b, - TA_PERF_SEL_image_nosampler_1_address_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x0000007c, - TA_PERF_SEL_image_nosampler_2_address_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x0000007d, - TA_PERF_SEL_image_nosampler_3_address_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x0000007e, - TA_PERF_SEL_image_nosampler_4_address_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x0000007f, - TA_PERF_SEL_latency_ram_weights_written_cycles__GFX103PLUSEXCLUSIVE = 0x0000009a, - TA_PERF_SEL_latency_ram_ws_required_quads__GFX103PLUSEXCLUSIVE = 0x0000009b, - TA_PERF_SEL_latency_ram_whv_required_quads__GFX103PLUSEXCLUSIVE = 0x0000009c, - TA_PERF_SEL_latency_ram_ws_required_instructions__GFX103PLUSEXCLUSIVE = 0x0000009d, - TA_PERF_SEL_latency_ram_whv_required_instructions__GFX103PLUSEXCLUSIVE = 0x0000009e, - TA_PERF_SEL_latency_ram_ref_required_instructions__GFX103PLUSEXCLUSIVE = 0x0000009f, - TA_PERF_SEL_atomic_2_write_data_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x000000a2, - TA_PERF_SEL_atomic_4_write_data_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x000000a3, - TA_PERF_SEL_atomic_write_data_input_cycles__GFX103PLUSEXCLUSIVE = 0x000000a4, - TA_PERF_SEL_atomic_write_data_output_cycles__GFX103PLUSEXCLUSIVE = 0x000000a5, - TA_PERF_SEL_num_unlit_nodes_ta_opt__GFX103PLUSEXCLUSIVE = 0x000000ad, - TA_PERF_SEL_image_bvh_8_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x000000bc, - TA_PERF_SEL_image_bvh_9_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x000000bd, - TA_PERF_SEL_image_bvh_11_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x000000be, - TA_PERF_SEL_image_bvh_12_input_vgpr_instructions__GFX103PLUSEXCLUSIVE = 0x000000bf, - TA_PERF_SEL_image_sampler_1_op_burst__GFX103PLUSEXCLUSIVE = 0x000000c0, - TA_PERF_SEL_image_sampler_2to3_op_burst__GFX103PLUSEXCLUSIVE = 0x000000c1, - TA_PERF_SEL_image_sampler_4to7_op_burst__GFX103PLUSEXCLUSIVE = 0x000000c2, - TA_PERF_SEL_image_sampler_ge8_op_burst__GFX103PLUSEXCLUSIVE = 0x000000c3, - TA_PERF_SEL_image_linked_1_op_burst__GFX103PLUSEXCLUSIVE = 0x000000c4, - TA_PERF_SEL_image_linked_2to3_op_burst__GFX103PLUSEXCLUSIVE = 0x000000c5, - TA_PERF_SEL_image_linked_4to7_op_burst__GFX103PLUSEXCLUSIVE = 0x000000c6, - TA_PERF_SEL_image_linked_ge8_op_burst__GFX103PLUSEXCLUSIVE = 0x000000c7, - TA_PERF_SEL_image_bvh_1_op_burst__GFX103PLUSEXCLUSIVE = 0x000000c8, - TA_PERF_SEL_image_bvh_2to3_op_burst__GFX103PLUSEXCLUSIVE = 0x000000c9, - TA_PERF_SEL_image_bvh_4to7_op_burst__GFX103PLUSEXCLUSIVE = 0x000000ca, - TA_PERF_SEL_image_bvh_ge8_op_burst__GFX103PLUSEXCLUSIVE = 0x000000cb, - TA_PERF_SEL_image_nosampler_1_op_burst__GFX103PLUSEXCLUSIVE = 0x000000cc, - TA_PERF_SEL_image_nosampler_2to3_op_burst__GFX103PLUSEXCLUSIVE = 0x000000cd, - TA_PERF_SEL_image_nosampler_4to31_op_burst__GFX103PLUSEXCLUSIVE = 0x000000ce, - TA_PERF_SEL_image_nosampler_ge32_op_burst__GFX103PLUSEXCLUSIVE = 0x000000cf, - TA_PERF_SEL_buffer_flat_1_op_burst__GFX103PLUSEXCLUSIVE = 0x000000d0, - TA_PERF_SEL_buffer_flat_2to3_op_burst__GFX103PLUSEXCLUSIVE = 0x000000d1, - TA_PERF_SEL_buffer_flat_4to31_op_burst__GFX103PLUSEXCLUSIVE = 0x000000d2, - TA_PERF_SEL_buffer_flat_ge32_op_burst__GFX103PLUSEXCLUSIVE = 0x000000d3, - TA_PERF_SEL_write_1_op_burst__GFX103PLUSEXCLUSIVE = 0x000000d4, - TA_PERF_SEL_write_2to3_op_burst__GFX103PLUSEXCLUSIVE = 0x000000d5, - TA_PERF_SEL_write_4to31_op_burst__GFX103PLUSEXCLUSIVE = 0x000000d6, - TA_PERF_SEL_write_ge32_op_burst__GFX103PLUSEXCLUSIVE = 0x000000d7, - TA_PERF_SEL_ibubble_1_cycle_burst__GFX103PLUSEXCLUSIVE = 0x000000d8, - TA_PERF_SEL_ibubble_2to3_cycle_burst__GFX103PLUSEXCLUSIVE = 0x000000d9, - TA_PERF_SEL_ibubble_4to15_cycle_burst__GFX103PLUSEXCLUSIVE = 0x000000da, - TA_PERF_SEL_ibubble_16to31_cycle_burst__GFX103PLUSEXCLUSIVE = 0x000000db, - TA_PERF_SEL_ibubble_32to63_cycle_burst__GFX103PLUSEXCLUSIVE = 0x000000dc, - TA_PERF_SEL_ibubble_ge64_cycle_burst__GFX103PLUSEXCLUSIVE = 0x000000dd, - TA_PERF_SEL_sampler_clk_valid_cycles__GFX103PLUSEXCLUSIVE = 0x000000e0, - TA_PERF_SEL_nonsampler_clk_valid_cycles__GFX103PLUSEXCLUSIVE = 0x000000e1, - TA_PERF_SEL_buffer_flat_clk_valid_cycles__GFX103PLUSEXCLUSIVE = 0x000000e2, - TA_PERF_SEL_write_data_clk_valid_cycles__GFX103PLUSEXCLUSIVE = 0x000000e3, - TA_PERF_SEL_gradient_clk_valid_cycles__GFX103PLUSEXCLUSIVE = 0x000000e4, - TA_PERF_SEL_lod_aniso_clk_valid_cycles__GFX103PLUSEXCLUSIVE = 0x000000e5, - TA_PERF_SEL_sampler_addressing_clk_valid_cycles__GFX103PLUSEXCLUSIVE = 0x000000e6, - TA_PERF_SEL_sync_sampler_sstate_fifo_clk_valid_cycles__GFX103PLUSEXCLUSIVE = 0x000000e7, - TA_PERF_SEL_sync_sampler_cstate_fifo_clk_valid_cycles__GFX103PLUSEXCLUSIVE = 0x000000e8, - TA_PERF_SEL_sync_nonsampler_fifo_clk_valid_cycles__GFX103PLUSEXCLUSIVE = 0x000000e9, - TA_PERF_SEL_aligner_clk_valid_cycles__GFX103PLUSEXCLUSIVE = 0x000000ea, - TA_PERF_SEL_tcreq_clk_valid_cycles__GFX103PLUSEXCLUSIVE = 0x000000eb, - TA_PERF_SEL_RESERVED_161__GFX10CORE = 0x000000a1, - TA_PERF_SEL_RESERVED_170__GFX10CORE = 0x000000aa, - TA_PERF_SEL_RESERVED_171__GFX10CORE = 0x000000ab, - TA_PERF_SEL_RESERVED_172__GFX10CORE = 0x000000ac, - TA_PERF_SEL_image_sampler_total_cycles__GFX10PLUS = 0x00000028, - TA_PERF_SEL_image_nosampler_total_cycles__GFX10PLUS = 0x00000029, - TA_PERF_SEL_flat_total_cycles__GFX10PLUS = 0x0000002a, - TA_PERF_SEL_in_busy__GFX10PLUS = 0x00000080, - TA_PERF_SEL_in_fifos_busy__GFX10PLUS = 0x00000081, - TA_PERF_SEL_in_cfifo_busy__GFX10PLUS = 0x00000082, - TA_PERF_SEL_in_qfifo_busy__GFX10PLUS = 0x00000083, - TA_PERF_SEL_in_wfifo_busy__GFX10PLUS = 0x00000084, - TA_PERF_SEL_in_rfifo_busy__GFX10PLUS = 0x00000085, - TA_PERF_SEL_bf_busy__GFX10PLUS = 0x00000086, - TA_PERF_SEL_ns_busy__GFX10PLUS = 0x00000087, - TA_PERF_SEL_smp_busy_ns_idle__GFX10PLUS = 0x00000088, - TA_PERF_SEL_smp_idle_ns_busy__GFX10PLUS = 0x00000089, - TA_PERF_SEL_vmemcmd_cycles__GFX10PLUS = 0x00000090, - TA_PERF_SEL_vmemreq_cycles__GFX10PLUS = 0x00000091, - TA_PERF_SEL_in_waiting_on_req_cycles__GFX10PLUS = 0x00000092, - TA_PERF_SEL_in_addr_cycles__GFX10PLUS = 0x00000096, - TA_PERF_SEL_in_data_cycles__GFX10PLUS = 0x00000097, - TA_PERF_SEL_point_sampled_quads__GFX10PLUS = 0x000000a0, -} TA_PERFCOUNT_SEL; - -constexpr unsigned int MaxTaPerfcountSelGfx09 = TA_PERF_SEL_first_xnack_on_phase3__GFX09; -constexpr unsigned int MaxTaPerfcountSelGfx101 = TA_PERF_SEL_nosample_path_cycles__GFX101; -constexpr unsigned int MaxTaPerfcountSelGfx103PlusExclusive = TA_PERF_SEL_tcreq_clk_valid_cycles__GFX103PLUSEXCLUSIVE; - -typedef enum TA_TC_ADDR_MODES { - TA_TC_ADDR_MODE_DEFAULT = 0x00000000, - TA_TC_ADDR_MODE_COMP0 = 0x00000001, - TA_TC_ADDR_MODE_COMP1 = 0x00000002, - TA_TC_ADDR_MODE_COMP2 = 0x00000003, - TA_TC_ADDR_MODE_COMP3 = 0x00000004, - TA_TC_ADDR_MODE_UNALIGNED = 0x00000005, - TA_TC_ADDR_MODE_BORDER_COLOR = 0x00000006, -} TA_TC_ADDR_MODES; - -typedef enum TA_TC_REQ_MODES { - TA_TC_REQ_MODE_BORDER = 0x00000000, - TA_TC_REQ_MODE_TEX2 = 0x00000001, - TA_TC_REQ_MODE_TEX1 = 0x00000002, - TA_TC_REQ_MODE_TEX0 = 0x00000003, - TA_TC_REQ_MODE_NORMAL = 0x00000004, - TA_TC_REQ_MODE_DWORD = 0x00000005, - TA_TC_REQ_MODE_BYTE = 0x00000006, - TA_TC_REQ_MODE_BYTE_NV = 0x00000007, -} TA_TC_REQ_MODES; - -typedef enum TCA_PERF_SEL { - TCA_PERF_SEL_NONE = 0x00000000, - TCA_PERF_SEL_CYCLE = 0x00000001, - TCA_PERF_SEL_BUSY = 0x00000002, - TCA_PERF_SEL_FORCED_HOLE_TCC0 = 0x00000003, - TCA_PERF_SEL_FORCED_HOLE_TCC1 = 0x00000004, - TCA_PERF_SEL_FORCED_HOLE_TCC2 = 0x00000005, - TCA_PERF_SEL_FORCED_HOLE_TCC3 = 0x00000006, - TCA_PERF_SEL_FORCED_HOLE_TCC4 = 0x00000007, - TCA_PERF_SEL_FORCED_HOLE_TCC5 = 0x00000008, - TCA_PERF_SEL_FORCED_HOLE_TCC6 = 0x00000009, - TCA_PERF_SEL_FORCED_HOLE_TCC7 = 0x0000000a, - TCA_PERF_SEL_REQ_TCC0 = 0x0000000b, - TCA_PERF_SEL_REQ_TCC1 = 0x0000000c, - TCA_PERF_SEL_REQ_TCC2 = 0x0000000d, - TCA_PERF_SEL_REQ_TCC3 = 0x0000000e, - TCA_PERF_SEL_REQ_TCC4 = 0x0000000f, - TCA_PERF_SEL_REQ_TCC5 = 0x00000010, - TCA_PERF_SEL_REQ_TCC6 = 0x00000011, - TCA_PERF_SEL_REQ_TCC7 = 0x00000012, - TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC0 = 0x00000013, - TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC1 = 0x00000014, - TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC2 = 0x00000015, - TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC3 = 0x00000016, - TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC4 = 0x00000017, - TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC5 = 0x00000018, - TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC6 = 0x00000019, - TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC7 = 0x0000001a, - TCA_PERF_SEL_CROSSBAR_STALL_TCC0 = 0x0000001b, - TCA_PERF_SEL_CROSSBAR_STALL_TCC1 = 0x0000001c, - TCA_PERF_SEL_CROSSBAR_STALL_TCC2 = 0x0000001d, - TCA_PERF_SEL_CROSSBAR_STALL_TCC3 = 0x0000001e, - TCA_PERF_SEL_CROSSBAR_STALL_TCC4 = 0x0000001f, - TCA_PERF_SEL_CROSSBAR_STALL_TCC5 = 0x00000020, - TCA_PERF_SEL_CROSSBAR_STALL_TCC6 = 0x00000021, - TCA_PERF_SEL_CROSSBAR_STALL_TCC7 = 0x00000022, -} TCA_PERF_SEL; - -constexpr unsigned int MaxTcaPerfSel = TCA_PERF_SEL_CROSSBAR_STALL_TCC7; - -typedef enum TCC_CACHE_POLICIES { - TCC_CACHE_POLICY_LRU = 0x00000000, - TCC_CACHE_POLICY_STREAM = 0x00000001, -} TCC_CACHE_POLICIES; - -typedef enum TCC_MTYPE { - MTYPE_NC = 0x00000000, - MTYPE_WC = 0x00000001, - MTYPE_CC = 0x00000002, -} TCC_MTYPE; - -typedef enum TCC_PERF_SEL { - TCC_PERF_SEL_NONE = 0x00000000, - TCC_PERF_SEL_CYCLE = 0x00000001, - TCC_PERF_SEL_BUSY = 0x00000002, - TCC_PERF_SEL_REQ = 0x00000003, - TCC_PERF_SEL_STREAMING_REQ = 0x00000004, - TCC_PERF_SEL_COMPRESSED_REQ = 0x00000005, - TCC_PERF_SEL_COMPRESSED_0_REQ = 0x00000006, - TCC_PERF_SEL_COMPRESSED_32_REQ = 0x00000007, - TCC_PERF_SEL_METADATA_REQ = 0x00000008, - TCC_PERF_SEL_NC_REQ = 0x00000009, - TCC_PERF_SEL_UC_REQ = 0x0000000a, - TCC_PERF_SEL_CC_REQ = 0x0000000b, - TCC_PERF_SEL_PROBE = 0x0000000c, - TCC_PERF_SEL_PROBE_ALL = 0x0000000d, - TCC_PERF_SEL_INTERNAL_PROBE = 0x0000000e, - TCC_PERF_SEL_READ = 0x0000000f, - TCC_PERF_SEL_WRITE = 0x00000010, - TCC_PERF_SEL_ATOMIC = 0x00000011, - TCC_PERF_SEL_NOP_ACK = 0x00000012, - TCC_PERF_SEL_NOP_RTN0 = 0x00000013, - TCC_PERF_SEL_HIT = 0x00000014, - TCC_PERF_SEL_SECTOR_HIT = 0x00000015, - TCC_PERF_SEL_MISS = 0x00000016, - TCC_PERF_SEL_DEWRITE_ALLOCATE_HIT = 0x00000017, - TCC_PERF_SEL_FULLY_WRITTEN_HIT = 0x00000018, - TCC_PERF_SEL_WRITEBACK = 0x00000019, - TCC_PERF_SEL_LATENCY_FIFO_FULL = 0x0000001a, - TCC_PERF_SEL_SRC_FIFO_FULL = 0x0000001b, - TCC_PERF_SEL_HOLE_FIFO_FULL = 0x0000001c, - TCC_PERF_SEL_TOO_MANY_EA_WRREQS_STALL = 0x00000025, - TCC_PERF_SEL_TAG_STALL = 0x00000032, - TCC_PERF_SEL_TAG_WRITEBACK_FIFO_FULL_STALL = 0x00000033, - TCC_PERF_SEL_TAG_MISS_NOTHING_REPLACEABLE_STALL = 0x00000034, - TCC_PERF_SEL_TAG_UNCACHED_WRITE_ATOMIC_FIFO_FULL_STALL = 0x00000035, - TCC_PERF_SEL_TAG_NO_UNCACHED_WRITE_ATOMIC_ENTRIES_STALL = 0x00000036, - TCC_PERF_SEL_TAG_PROBE_STALL = 0x00000037, - TCC_PERF_SEL_TAG_PROBE_FILTER_STALL = 0x00000038, - TCC_PERF_SEL_TAG_PROBE_FIFO_FULL_STALL = 0x00000039, - TCC_PERF_SEL_READ_RETURN_TIMEOUT = 0x0000003a, - TCC_PERF_SEL_WRITEBACK_READ_TIMEOUT = 0x0000003b, - TCC_PERF_SEL_READ_RETURN_FULL_BUBBLE = 0x0000003c, - TCC_PERF_SEL_BUBBLE = 0x0000003d, - TCC_PERF_SEL_RETURN_ACK = 0x0000003e, - TCC_PERF_SEL_RETURN_DATA = 0x0000003f, - TCC_PERF_SEL_RETURN_HOLE = 0x00000040, - TCC_PERF_SEL_RETURN_ACK_HOLE = 0x00000041, - TCC_PERF_SEL_IB_REQ = 0x00000042, - TCC_PERF_SEL_IB_STALL = 0x00000043, - TCC_PERF_SEL_IB_TAG_STALL = 0x00000044, - TCC_PERF_SEL_CLIENT0_REQ = 0x00000080, - TCC_PERF_SEL_CLIENT1_REQ = 0x00000081, - TCC_PERF_SEL_CLIENT2_REQ = 0x00000082, - TCC_PERF_SEL_CLIENT3_REQ = 0x00000083, - TCC_PERF_SEL_CLIENT4_REQ = 0x00000084, - TCC_PERF_SEL_CLIENT5_REQ = 0x00000085, - TCC_PERF_SEL_CLIENT6_REQ = 0x00000086, - TCC_PERF_SEL_CLIENT7_REQ = 0x00000087, - TCC_PERF_SEL_CLIENT8_REQ = 0x00000088, - TCC_PERF_SEL_CLIENT9_REQ = 0x00000089, - TCC_PERF_SEL_CLIENT10_REQ = 0x0000008a, - TCC_PERF_SEL_CLIENT11_REQ = 0x0000008b, - TCC_PERF_SEL_CLIENT12_REQ = 0x0000008c, - TCC_PERF_SEL_CLIENT13_REQ = 0x0000008d, - TCC_PERF_SEL_CLIENT14_REQ = 0x0000008e, - TCC_PERF_SEL_CLIENT15_REQ = 0x0000008f, - TCC_PERF_SEL_CLIENT16_REQ = 0x00000090, - TCC_PERF_SEL_CLIENT17_REQ = 0x00000091, - TCC_PERF_SEL_CLIENT18_REQ = 0x00000092, - TCC_PERF_SEL_CLIENT19_REQ = 0x00000093, - TCC_PERF_SEL_CLIENT20_REQ = 0x00000094, - TCC_PERF_SEL_CLIENT21_REQ = 0x00000095, - TCC_PERF_SEL_CLIENT22_REQ = 0x00000096, - TCC_PERF_SEL_CLIENT23_REQ = 0x00000097, - TCC_PERF_SEL_CLIENT24_REQ = 0x00000098, - TCC_PERF_SEL_CLIENT25_REQ = 0x00000099, - TCC_PERF_SEL_CLIENT26_REQ = 0x0000009a, - TCC_PERF_SEL_CLIENT27_REQ = 0x0000009b, - TCC_PERF_SEL_CLIENT28_REQ = 0x0000009c, - TCC_PERF_SEL_CLIENT29_REQ = 0x0000009d, - TCC_PERF_SEL_CLIENT30_REQ = 0x0000009e, - TCC_PERF_SEL_CLIENT31_REQ = 0x0000009f, - TCC_PERF_SEL_CLIENT32_REQ = 0x000000a0, - TCC_PERF_SEL_CLIENT33_REQ = 0x000000a1, - TCC_PERF_SEL_CLIENT34_REQ = 0x000000a2, - TCC_PERF_SEL_CLIENT35_REQ = 0x000000a3, - TCC_PERF_SEL_CLIENT36_REQ = 0x000000a4, - TCC_PERF_SEL_CLIENT37_REQ = 0x000000a5, - TCC_PERF_SEL_CLIENT38_REQ = 0x000000a6, - TCC_PERF_SEL_CLIENT39_REQ = 0x000000a7, - TCC_PERF_SEL_CLIENT40_REQ = 0x000000a8, - TCC_PERF_SEL_CLIENT41_REQ = 0x000000a9, - TCC_PERF_SEL_CLIENT42_REQ = 0x000000aa, - TCC_PERF_SEL_CLIENT43_REQ = 0x000000ab, - TCC_PERF_SEL_CLIENT44_REQ = 0x000000ac, - TCC_PERF_SEL_CLIENT45_REQ = 0x000000ad, - TCC_PERF_SEL_CLIENT46_REQ = 0x000000ae, - TCC_PERF_SEL_CLIENT47_REQ = 0x000000af, - TCC_PERF_SEL_CLIENT48_REQ = 0x000000b0, - TCC_PERF_SEL_CLIENT49_REQ = 0x000000b1, - TCC_PERF_SEL_CLIENT50_REQ = 0x000000b2, - TCC_PERF_SEL_CLIENT51_REQ = 0x000000b3, - TCC_PERF_SEL_CLIENT52_REQ = 0x000000b4, - TCC_PERF_SEL_CLIENT53_REQ = 0x000000b5, - TCC_PERF_SEL_CLIENT54_REQ = 0x000000b6, - TCC_PERF_SEL_CLIENT55_REQ = 0x000000b7, - TCC_PERF_SEL_CLIENT56_REQ = 0x000000b8, - TCC_PERF_SEL_CLIENT57_REQ = 0x000000b9, - TCC_PERF_SEL_CLIENT58_REQ = 0x000000ba, - TCC_PERF_SEL_CLIENT59_REQ = 0x000000bb, - TCC_PERF_SEL_CLIENT60_REQ = 0x000000bc, - TCC_PERF_SEL_CLIENT61_REQ = 0x000000bd, - TCC_PERF_SEL_CLIENT62_REQ = 0x000000be, - TCC_PERF_SEL_CLIENT63_REQ = 0x000000bf, - TCC_PERF_SEL_CLIENT64_REQ = 0x000000c0, - TCC_PERF_SEL_CLIENT65_REQ = 0x000000c1, - TCC_PERF_SEL_CLIENT66_REQ = 0x000000c2, - TCC_PERF_SEL_CLIENT67_REQ = 0x000000c3, - TCC_PERF_SEL_CLIENT68_REQ = 0x000000c4, - TCC_PERF_SEL_CLIENT69_REQ = 0x000000c5, - TCC_PERF_SEL_CLIENT70_REQ = 0x000000c6, - TCC_PERF_SEL_CLIENT71_REQ = 0x000000c7, - TCC_PERF_SEL_CLIENT72_REQ = 0x000000c8, - TCC_PERF_SEL_CLIENT73_REQ = 0x000000c9, - TCC_PERF_SEL_CLIENT74_REQ = 0x000000ca, - TCC_PERF_SEL_CLIENT75_REQ = 0x000000cb, - TCC_PERF_SEL_CLIENT76_REQ = 0x000000cc, - TCC_PERF_SEL_CLIENT77_REQ = 0x000000cd, - TCC_PERF_SEL_CLIENT78_REQ = 0x000000ce, - TCC_PERF_SEL_CLIENT79_REQ = 0x000000cf, - TCC_PERF_SEL_CLIENT80_REQ = 0x000000d0, - TCC_PERF_SEL_CLIENT81_REQ = 0x000000d1, - TCC_PERF_SEL_CLIENT82_REQ = 0x000000d2, - TCC_PERF_SEL_CLIENT83_REQ = 0x000000d3, - TCC_PERF_SEL_CLIENT84_REQ = 0x000000d4, - TCC_PERF_SEL_CLIENT85_REQ = 0x000000d5, - TCC_PERF_SEL_CLIENT86_REQ = 0x000000d6, - TCC_PERF_SEL_CLIENT87_REQ = 0x000000d7, - TCC_PERF_SEL_CLIENT88_REQ = 0x000000d8, - TCC_PERF_SEL_CLIENT89_REQ = 0x000000d9, - TCC_PERF_SEL_CLIENT90_REQ = 0x000000da, - TCC_PERF_SEL_CLIENT91_REQ = 0x000000db, - TCC_PERF_SEL_CLIENT92_REQ = 0x000000dc, - TCC_PERF_SEL_CLIENT93_REQ = 0x000000dd, - TCC_PERF_SEL_CLIENT94_REQ = 0x000000de, - TCC_PERF_SEL_CLIENT95_REQ = 0x000000df, - TCC_PERF_SEL_CLIENT96_REQ = 0x000000e0, - TCC_PERF_SEL_CLIENT97_REQ = 0x000000e1, - TCC_PERF_SEL_CLIENT98_REQ = 0x000000e2, - TCC_PERF_SEL_CLIENT99_REQ = 0x000000e3, - TCC_PERF_SEL_CLIENT100_REQ = 0x000000e4, - TCC_PERF_SEL_CLIENT101_REQ = 0x000000e5, - TCC_PERF_SEL_CLIENT102_REQ = 0x000000e6, - TCC_PERF_SEL_CLIENT103_REQ = 0x000000e7, - TCC_PERF_SEL_CLIENT104_REQ = 0x000000e8, - TCC_PERF_SEL_CLIENT105_REQ = 0x000000e9, - TCC_PERF_SEL_CLIENT106_REQ = 0x000000ea, - TCC_PERF_SEL_CLIENT107_REQ = 0x000000eb, - TCC_PERF_SEL_CLIENT108_REQ = 0x000000ec, - TCC_PERF_SEL_CLIENT109_REQ = 0x000000ed, - TCC_PERF_SEL_CLIENT110_REQ = 0x000000ee, - TCC_PERF_SEL_CLIENT111_REQ = 0x000000ef, - TCC_PERF_SEL_CLIENT112_REQ = 0x000000f0, - TCC_PERF_SEL_CLIENT113_REQ = 0x000000f1, - TCC_PERF_SEL_CLIENT114_REQ = 0x000000f2, - TCC_PERF_SEL_CLIENT115_REQ = 0x000000f3, - TCC_PERF_SEL_CLIENT116_REQ = 0x000000f4, - TCC_PERF_SEL_CLIENT117_REQ = 0x000000f5, - TCC_PERF_SEL_CLIENT118_REQ = 0x000000f6, - TCC_PERF_SEL_CLIENT119_REQ = 0x000000f7, - TCC_PERF_SEL_CLIENT120_REQ = 0x000000f8, - TCC_PERF_SEL_CLIENT121_REQ = 0x000000f9, - TCC_PERF_SEL_CLIENT122_REQ = 0x000000fa, - TCC_PERF_SEL_CLIENT123_REQ = 0x000000fb, - TCC_PERF_SEL_CLIENT124_REQ = 0x000000fc, - TCC_PERF_SEL_CLIENT125_REQ = 0x000000fd, - TCC_PERF_SEL_CLIENT126_REQ = 0x000000fe, - TCC_PERF_SEL_CLIENT127_REQ = 0x000000ff, - TCC_PERF_SEL_EA_RDREQ_DRAM__RV2X = 0x00000076, - TCC_PERF_SEL_EA_WRREQ_DRAM__RV2X = 0x00000077, - TCC_PERF_SEL_EA_WRREQ__VG10_RV1X_RV2X = 0x0000001d, - TCC_PERF_SEL_EA_WRREQ_64B__VG10_RV1X_RV2X = 0x0000001e, - TCC_PERF_SEL_EA_WRREQ_PROBE_COMMAND__VG10_RV1X_RV2X = 0x0000001f, - TCC_PERF_SEL_EA_WR_UNCACHED_32B__VG10_RV1X_RV2X = 0x00000020, - TCC_PERF_SEL_EA_WRREQ_STALL__VG10_RV1X_RV2X = 0x00000021, - TCC_PERF_SEL_EA_WRREQ_IO_CREDIT_STALL__VG10_RV1X_RV2X = 0x00000022, - TCC_PERF_SEL_EA_WRREQ_GMI_CREDIT_STALL__VG10_RV1X_RV2X = 0x00000023, - TCC_PERF_SEL_EA_WRREQ_DRAM_CREDIT_STALL__VG10_RV1X_RV2X = 0x00000024, - TCC_PERF_SEL_EA_WRREQ_LEVEL__VG10_RV1X_RV2X = 0x00000026, - TCC_PERF_SEL_EA_ATOMIC__VG10_RV1X_RV2X = 0x00000027, - TCC_PERF_SEL_EA_ATOMIC_LEVEL__VG10_RV1X_RV2X = 0x00000028, - TCC_PERF_SEL_EA_RDREQ__VG10_RV1X_RV2X = 0x00000029, - TCC_PERF_SEL_EA_RDREQ_32B__VG10_RV1X_RV2X = 0x0000002a, - TCC_PERF_SEL_EA_RD_UNCACHED_32B__VG10_RV1X_RV2X = 0x0000002b, - TCC_PERF_SEL_EA_RD_MDC_32B__VG10_RV1X_RV2X = 0x0000002c, - TCC_PERF_SEL_EA_RD_COMPRESSED_32B__VG10_RV1X_RV2X = 0x0000002d, - TCC_PERF_SEL_EA_RDREQ_IO_CREDIT_STALL__VG10_RV1X_RV2X = 0x0000002e, - TCC_PERF_SEL_EA_RDREQ_GMI_CREDIT_STALL__VG10_RV1X_RV2X = 0x0000002f, - TCC_PERF_SEL_EA_RDREQ_DRAM_CREDIT_STALL__VG10_RV1X_RV2X = 0x00000030, - TCC_PERF_SEL_EA_RDREQ_LEVEL__VG10_RV1X_RV2X = 0x00000031, - TCC_PERF_SEL_EA_RDRET_NACK__VG10_RV1X_RV2X = 0x00000048, - TCC_PERF_SEL_EA_WRRET_NACK__VG10_RV1X_RV2X = 0x00000049, - TCC_PERF_SEL_IB_MDC_STALL__VG10_VG12_RV1X_RV2X_RN = 0x00000045, - TCC_PERF_SEL_TCA_LEVEL__VG10_VG12_RV1X_RV2X_RN = 0x00000046, - TCC_PERF_SEL_HOLE_LEVEL__VG10_VG12_RV1X_RV2X_RN = 0x00000047, - TCC_PERF_SEL_NORMAL_WRITEBACK__VG10_VG12_RV1X_RV2X_RN = 0x0000004a, - TCC_PERF_SEL_TC_OP_WBL2_NC_WRITEBACK__VG10_VG12_RV1X_RV2X_RN = 0x0000004b, - TCC_PERF_SEL_TC_OP_WBINVL2_WRITEBACK__VG10_VG12_RV1X_RV2X_RN = 0x0000004c, - TCC_PERF_SEL_TC_OP_WBINVL2_NC_WRITEBACK__VG10_VG12_RV1X_RV2X_RN = 0x0000004d, - TCC_PERF_SEL_TC_OP_WBINVL2_SD_WRITEBACK__VG10_VG12_RV1X_RV2X_RN = 0x0000004e, - TCC_PERF_SEL_ALL_TC_OP_WB_WRITEBACK__VG10_VG12_RV1X_RV2X_RN = 0x0000004f, - TCC_PERF_SEL_NORMAL_EVICT__VG10_VG12_RV1X_RV2X_RN = 0x00000050, - TCC_PERF_SEL_TC_OP_WBL2_NC_EVICT__VG10_VG12_RV1X_RV2X_RN = 0x00000051, - TCC_PERF_SEL_TC_OP_INVL2_NC_EVICT__VG10_VG12_RV1X_RV2X_RN = 0x00000052, - TCC_PERF_SEL_TC_OP_WBINVL2_EVICT__VG10_VG12_RV1X_RV2X_RN = 0x00000053, - TCC_PERF_SEL_TC_OP_WBINVL2_NC_EVICT__VG10_VG12_RV1X_RV2X_RN = 0x00000054, - TCC_PERF_SEL_TC_OP_WBINVL2_SD_EVICT__VG10_VG12_RV1X_RV2X_RN = 0x00000055, - TCC_PERF_SEL_ALL_TC_OP_INV_EVICT__VG10_VG12_RV1X_RV2X_RN = 0x00000056, - TCC_PERF_SEL_PROBE_EVICT__VG10_VG12_RV1X_RV2X_RN = 0x00000057, - TCC_PERF_SEL_TC_OP_WBL2_NC_CYCLE__VG10_VG12_RV1X_RV2X_RN = 0x00000058, - TCC_PERF_SEL_TC_OP_INVL2_NC_CYCLE__VG10_VG12_RV1X_RV2X_RN = 0x00000059, - TCC_PERF_SEL_TC_OP_WBINVL2_CYCLE__VG10_VG12_RV1X_RV2X_RN = 0x0000005a, - TCC_PERF_SEL_TC_OP_WBINVL2_NC_CYCLE__VG10_VG12_RV1X_RV2X_RN = 0x0000005b, - TCC_PERF_SEL_TC_OP_WBINVL2_SD_CYCLE__VG10_VG12_RV1X_RV2X_RN = 0x0000005c, - TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_CYCLE__VG10_VG12_RV1X_RV2X_RN = 0x0000005d, - TCC_PERF_SEL_TC_OP_WBL2_NC_START__VG10_VG12_RV1X_RV2X_RN = 0x0000005e, - TCC_PERF_SEL_TC_OP_INVL2_NC_START__VG10_VG12_RV1X_RV2X_RN = 0x0000005f, - TCC_PERF_SEL_TC_OP_WBINVL2_START__VG10_VG12_RV1X_RV2X_RN = 0x00000060, - TCC_PERF_SEL_TC_OP_WBINVL2_NC_START__VG10_VG12_RV1X_RV2X_RN = 0x00000061, - TCC_PERF_SEL_TC_OP_WBINVL2_SD_START__VG10_VG12_RV1X_RV2X_RN = 0x00000062, - TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_START__VG10_VG12_RV1X_RV2X_RN = 0x00000063, - TCC_PERF_SEL_TC_OP_WBL2_NC_FINISH__VG10_VG12_RV1X_RV2X_RN = 0x00000064, - TCC_PERF_SEL_TC_OP_INVL2_NC_FINISH__VG10_VG12_RV1X_RV2X_RN = 0x00000065, - TCC_PERF_SEL_TC_OP_WBINVL2_FINISH__VG10_VG12_RV1X_RV2X_RN = 0x00000066, - TCC_PERF_SEL_TC_OP_WBINVL2_NC_FINISH__VG10_VG12_RV1X_RV2X_RN = 0x00000067, - TCC_PERF_SEL_TC_OP_WBINVL2_SD_FINISH__VG10_VG12_RV1X_RV2X_RN = 0x00000068, - TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_FINISH__VG10_VG12_RV1X_RV2X_RN = 0x00000069, - TCC_PERF_SEL_TC_OP_INV_METADATA__VG10_VG12_RV1X_RV2X_RN = 0x0000006a, - TCC_PERF_SEL_MDC_REQ__VG10_VG12_RV1X_RV2X_RN = 0x0000006b, - TCC_PERF_SEL_MDC_LEVEL__VG10_VG12_RV1X_RV2X_RN = 0x0000006c, - TCC_PERF_SEL_MDC_TAG_HIT__VG10_VG12_RV1X_RV2X_RN = 0x0000006d, - TCC_PERF_SEL_MDC_SECTOR_HIT__VG10_VG12_RV1X_RV2X_RN = 0x0000006e, - TCC_PERF_SEL_MDC_SECTOR_MISS__VG10_VG12_RV1X_RV2X_RN = 0x0000006f, - TCC_PERF_SEL_MDC_TAG_STALL__VG10_VG12_RV1X_RV2X_RN = 0x00000070, - TCC_PERF_SEL_MDC_TAG_REPLACEMENT_LINE_IN_USE_STALL__VG10_VG12_RV1X_RV2X_RN = 0x00000071, - TCC_PERF_SEL_MDC_TAG_DESECTORIZATION_FIFO_FULL_STALL__VG10_VG12_RV1X_RV2X_RN = 0x00000072, - TCC_PERF_SEL_MDC_TAG_WAITING_FOR_INVALIDATE_COMPLETION_STALL__VG10_VG12_RV1X_RV2X_RN = 0x00000073, - TCC_PERF_SEL_PROBE_FILTER_DISABLE_TRANSITION__VG10_VG12_RV1X_RV2X_RN = 0x00000074, - TCC_PERF_SEL_PROBE_FILTER_DISABLED__VG10_VG12_RV1X_RV2X_RN = 0x00000075, - TCC_PERF_SEL_EA0_RDRET_NACK__VG12_RN = 0x00000048, - TCC_PERF_SEL_EA0_WRRET_NACK__VG12_RN = 0x00000049, - TCC_PERF_SEL_EA0_RDREQ_DRAM__VG12_RN = 0x00000076, - TCC_PERF_SEL_EA0_WRREQ_DRAM__VG12_RN = 0x00000077, - TCC_PERF_SEL_EA0_WRREQ__VG12_VG20_RN = 0x0000001d, - TCC_PERF_SEL_EA0_WRREQ_64B__VG12_VG20_RN = 0x0000001e, - TCC_PERF_SEL_EA0_WRREQ_PROBE_COMMAND__VG12_VG20_RN = 0x0000001f, - TCC_PERF_SEL_EA0_WR_UNCACHED_32B__VG12_VG20_RN = 0x00000020, - TCC_PERF_SEL_EA0_WRREQ_STALL__VG12_VG20_RN = 0x00000021, - TCC_PERF_SEL_EA0_WRREQ_IO_CREDIT_STALL__VG12_VG20_RN = 0x00000022, - TCC_PERF_SEL_EA0_WRREQ_GMI_CREDIT_STALL__VG12_VG20_RN = 0x00000023, - TCC_PERF_SEL_EA0_WRREQ_DRAM_CREDIT_STALL__VG12_VG20_RN = 0x00000024, - TCC_PERF_SEL_EA0_WRREQ_LEVEL__VG12_VG20_RN = 0x00000026, - TCC_PERF_SEL_EA0_ATOMIC__VG12_VG20_RN = 0x00000027, - TCC_PERF_SEL_EA0_ATOMIC_LEVEL__VG12_VG20_RN = 0x00000028, - TCC_PERF_SEL_EA0_RDREQ__VG12_VG20_RN = 0x00000029, - TCC_PERF_SEL_EA0_RDREQ_32B__VG12_VG20_RN = 0x0000002a, - TCC_PERF_SEL_EA0_RD_UNCACHED_32B__VG12_VG20_RN = 0x0000002b, - TCC_PERF_SEL_EA0_RD_MDC_32B__VG12_VG20_RN = 0x0000002c, - TCC_PERF_SEL_EA0_RD_COMPRESSED_32B__VG12_VG20_RN = 0x0000002d, - TCC_PERF_SEL_EA0_RDREQ_IO_CREDIT_STALL__VG12_VG20_RN = 0x0000002e, - TCC_PERF_SEL_EA0_RDREQ_GMI_CREDIT_STALL__VG12_VG20_RN = 0x0000002f, - TCC_PERF_SEL_EA0_RDREQ_DRAM_CREDIT_STALL__VG12_VG20_RN = 0x00000030, - TCC_PERF_SEL_EA0_RDREQ_LEVEL__VG12_VG20_RN = 0x00000031, - TCC_PERF_SEL_IB_NOT_VALID__VG20 = 0x00000045, - TCC_PERF_SEL_IB_MDC_STALL__VG20 = 0x00000046, - TCC_PERF_SEL_TCA_LEVEL__VG20 = 0x00000047, - TCC_PERF_SEL_HOLE_LEVEL__VG20 = 0x00000048, - TCC_PERF_SEL_EA0_RDRET_NACK__VG20 = 0x00000049, - TCC_PERF_SEL_EA0_WRRET_NACK__VG20 = 0x0000004a, - TCC_PERF_SEL_NORMAL_WRITEBACK__VG20 = 0x0000004b, - TCC_PERF_SEL_TC_OP_WBL2_NC_WRITEBACK__VG20 = 0x0000004c, - TCC_PERF_SEL_TC_OP_WBINVL2_WRITEBACK__VG20 = 0x0000004d, - TCC_PERF_SEL_TC_OP_WBINVL2_NC_WRITEBACK__VG20 = 0x0000004e, - TCC_PERF_SEL_TC_OP_WBINVL2_SD_WRITEBACK__VG20 = 0x0000004f, - TCC_PERF_SEL_ALL_TC_OP_WB_WRITEBACK__VG20 = 0x00000050, - TCC_PERF_SEL_NORMAL_EVICT__VG20 = 0x00000051, - TCC_PERF_SEL_TC_OP_WBL2_NC_EVICT__VG20 = 0x00000052, - TCC_PERF_SEL_TC_OP_INVL2_NC_EVICT__VG20 = 0x00000053, - TCC_PERF_SEL_TC_OP_WBINVL2_EVICT__VG20 = 0x00000054, - TCC_PERF_SEL_TC_OP_WBINVL2_NC_EVICT__VG20 = 0x00000055, - TCC_PERF_SEL_TC_OP_WBINVL2_SD_EVICT__VG20 = 0x00000056, - TCC_PERF_SEL_ALL_TC_OP_INV_EVICT__VG20 = 0x00000057, - TCC_PERF_SEL_PROBE_EVICT__VG20 = 0x00000058, - TCC_PERF_SEL_TC_OP_WBL2_NC_CYCLE__VG20 = 0x00000059, - TCC_PERF_SEL_TC_OP_INVL2_NC_CYCLE__VG20 = 0x0000005a, - TCC_PERF_SEL_TC_OP_WBINVL2_CYCLE__VG20 = 0x0000005b, - TCC_PERF_SEL_TC_OP_WBINVL2_NC_CYCLE__VG20 = 0x0000005c, - TCC_PERF_SEL_TC_OP_WBINVL2_SD_CYCLE__VG20 = 0x0000005d, - TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_CYCLE__VG20 = 0x0000005e, - TCC_PERF_SEL_TC_OP_WBL2_NC_START__VG20 = 0x0000005f, - TCC_PERF_SEL_TC_OP_INVL2_NC_START__VG20 = 0x00000060, - TCC_PERF_SEL_TC_OP_WBINVL2_START__VG20 = 0x00000061, - TCC_PERF_SEL_TC_OP_WBINVL2_NC_START__VG20 = 0x00000062, - TCC_PERF_SEL_TC_OP_WBINVL2_SD_START__VG20 = 0x00000063, - TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_START__VG20 = 0x00000064, - TCC_PERF_SEL_TC_OP_WBL2_NC_FINISH__VG20 = 0x00000065, - TCC_PERF_SEL_TC_OP_INVL2_NC_FINISH__VG20 = 0x00000066, - TCC_PERF_SEL_TC_OP_WBINVL2_FINISH__VG20 = 0x00000067, - TCC_PERF_SEL_TC_OP_WBINVL2_NC_FINISH__VG20 = 0x00000068, - TCC_PERF_SEL_TC_OP_WBINVL2_SD_FINISH__VG20 = 0x00000069, - TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_FINISH__VG20 = 0x0000006a, - TCC_PERF_SEL_TC_OP_INV_METADATA__VG20 = 0x0000006b, - TCC_PERF_SEL_MDC_REQ__VG20 = 0x0000006c, - TCC_PERF_SEL_MDC_LEVEL__VG20 = 0x0000006d, - TCC_PERF_SEL_MDC_TAG_HIT__VG20 = 0x0000006e, - TCC_PERF_SEL_MDC_SECTOR_HIT__VG20 = 0x0000006f, - TCC_PERF_SEL_MDC_SECTOR_MISS__VG20 = 0x00000070, - TCC_PERF_SEL_MDC_TAG_STALL__VG20 = 0x00000071, - TCC_PERF_SEL_MDC_TAG_REPLACEMENT_LINE_IN_USE_STALL__VG20 = 0x00000072, - TCC_PERF_SEL_MDC_TAG_DESECTORIZATION_FIFO_FULL_STALL__VG20 = 0x00000073, - TCC_PERF_SEL_MDC_TAG_WAITING_FOR_INVALIDATE_COMPLETION_STALL__VG20 = 0x00000074, - TCC_PERF_SEL_PROBE_FILTER_DISABLE_TRANSITION__VG20 = 0x00000075, - TCC_PERF_SEL_PROBE_FILTER_DISABLED__VG20 = 0x00000076, - TCC_PERF_SEL_EA0_RDREQ_DRAM__VG20 = 0x00000077, - TCC_PERF_SEL_EA0_WRREQ_DRAM__VG20 = 0x00000078, - TCC_PERF_SEL_EA1_WRREQ__VG20 = 0x00000100, - TCC_PERF_SEL_EA1_WRREQ_64B__VG20 = 0x00000101, - TCC_PERF_SEL_EA1_WRREQ_PROBE_COMMAND__VG20 = 0x00000102, - TCC_PERF_SEL_EA1_WR_UNCACHED_32B__VG20 = 0x00000103, - TCC_PERF_SEL_EA1_WRREQ_STALL__VG20 = 0x00000104, - TCC_PERF_SEL_EA1_WRREQ_IO_CREDIT_STALL__VG20 = 0x00000105, - TCC_PERF_SEL_EA1_WRREQ_GMI_CREDIT_STALL__VG20 = 0x00000106, - TCC_PERF_SEL_EA1_WRREQ_DRAM_CREDIT_STALL__VG20 = 0x00000107, - TCC_PERF_SEL_EA1_WRREQ_LEVEL__VG20 = 0x00000108, - TCC_PERF_SEL_EA1_ATOMIC__VG20 = 0x00000109, - TCC_PERF_SEL_EA1_ATOMIC_LEVEL__VG20 = 0x0000010a, - TCC_PERF_SEL_EA1_RDREQ__VG20 = 0x0000010b, - TCC_PERF_SEL_EA1_RDREQ_32B__VG20 = 0x0000010c, - TCC_PERF_SEL_EA1_RD_UNCACHED_32B__VG20 = 0x0000010d, - TCC_PERF_SEL_EA1_RD_MDC_32B__VG20 = 0x0000010e, - TCC_PERF_SEL_EA1_RD_COMPRESSED_32B__VG20 = 0x0000010f, - TCC_PERF_SEL_EA1_RDREQ_IO_CREDIT_STALL__VG20 = 0x00000110, - TCC_PERF_SEL_EA1_RDREQ_GMI_CREDIT_STALL__VG20 = 0x00000111, - TCC_PERF_SEL_EA1_RDREQ_DRAM_CREDIT_STALL__VG20 = 0x00000112, - TCC_PERF_SEL_EA1_RDREQ_LEVEL__VG20 = 0x00000113, - TCC_PERF_SEL_EA1_RDRET_NACK__VG20 = 0x00000114, - TCC_PERF_SEL_EA1_WRRET_NACK__VG20 = 0x00000115, - TCC_PERF_SEL_EA1_RDREQ_DRAM__VG20 = 0x00000116, - TCC_PERF_SEL_EA1_WRREQ_DRAM__VG20 = 0x00000117, - TCC_PERF_SEL_CLIENTX_LEVEL__VG20 = 0x00000118, - TCC_PERF_CLIENT_RETURN_BUBBLE__VG20 = 0x00000119, -} TCC_PERF_SEL; - -constexpr unsigned int MaxTccPerfSelVg10_Vg12_Rv1x_Rv2x_Rn = TCC_PERF_SEL_CLIENT127_REQ; -constexpr unsigned int MaxTccPerfSelVg20 = TCC_PERF_CLIENT_RETURN_BUBBLE__VG20; - -typedef enum TCP_CACHE_POLICIES { - TCP_CACHE_POLICY_MISS_LRU = 0x00000000, - TCP_CACHE_POLICY_MISS_EVICT = 0x00000001, - TCP_CACHE_POLICY_HIT_LRU = 0x00000002, - TCP_CACHE_POLICY_HIT_EVICT = 0x00000003, -} TCP_CACHE_POLICIES; - -typedef enum TCP_CACHE_STORE_POLICIES { - TCP_CACHE_STORE_POLICY_WT_LRU = 0x00000000, - TCP_CACHE_STORE_POLICY_WT_EVICT = 0x00000001, -} TCP_CACHE_STORE_POLICIES; - -typedef enum TCP_DSM_DATA_SEL { - TCP_DSM_DISABLE = 0x00000000, - TCP_DSM_SEL0 = 0x00000001, - TCP_DSM_SEL1 = 0x00000002, - TCP_DSM_SEL_BOTH = 0x00000003, -} TCP_DSM_DATA_SEL; - -typedef enum TCP_DSM_INJECT_SEL { - TCP_DSM_INJECT_SEL0 = 0x00000000, - TCP_DSM_INJECT_SEL1 = 0x00000001, - TCP_DSM_INJECT_SEL2 = 0x00000002, - TCP_DSM_INJECT_SEL3 = 0x00000003, -} TCP_DSM_INJECT_SEL; - -typedef enum TCP_DSM_SINGLE_WRITE { - TCP_DSM_SINGLE_WRITE_DIS = 0x00000000, - TCP_DSM_SINGLE_WRITE_EN = 0x00000001, -} TCP_DSM_SINGLE_WRITE; - -typedef enum TCP_OPCODE_TYPE { - TCP_OPCODE_READ = 0x00000000, - TCP_OPCODE_WRITE = 0x00000001, - TCP_OPCODE_ATOMIC = 0x00000002, - TCP_OPCODE_ATOMIC_CMPSWAP = 0x00000004, - TCP_OPCODE_WBINVL1__GFX09 = 0x00000003, - TCP_OPCODE_GATHERH__GFX09 = 0x00000005, - TCP_OPCODE_INV__GFX10PLUS = 0x00000003, - TCP_OPCODE_SAMPLER__GFX10PLUS = 0x00000005, - TCP_OPCODE_LOAD__GFX10PLUS = 0x00000006, - TCP_OPCODE_GATHERH__GFX10PLUS = 0x00000007, -} TCP_OPCODE_TYPE; - -typedef enum TCP_PERFCOUNT_SELECT { - TCP_PERF_SEL_GATE_EN1 = 0x00000000, - TCP_PERF_SEL_GATE_EN2 = 0x00000001, - TCP_PERF_SEL_CORE_REG_SCLK_VLD__GFX09 = 0x00000002, - TCP_PERF_SEL_TA_TCP_ADDR_STARVE_CYCLES__GFX09 = 0x00000003, - TCP_PERF_SEL_TA_TCP_DATA_STARVE_CYCLES__GFX09 = 0x00000004, - TCP_PERF_SEL_TCP_TA_ADDR_STALL_CYCLES__GFX09 = 0x00000005, - TCP_PERF_SEL_TCP_TA_DATA_STALL_CYCLES__GFX09 = 0x00000006, - TCP_PERF_SEL_TD_TCP_STALL_CYCLES__GFX09 = 0x00000007, - TCP_PERF_SEL_TCR_TCP_STALL_CYCLES__GFX09 = 0x00000008, - TCP_PERF_SEL_TCP_TCR_STARVE_CYCLES__GFX09 = 0x00000009, - TCP_PERF_SEL_LOD_STALL_CYCLES__GFX09 = 0x0000000a, - TCP_PERF_SEL_READ_TAGCONFLICT_STALL_CYCLES__GFX09 = 0x0000000b, - TCP_PERF_SEL_WRITE_TAGCONFLICT_STALL_CYCLES__GFX09 = 0x0000000c, - TCP_PERF_SEL_ATOMIC_TAGCONFLICT_STALL_CYCLES__GFX09 = 0x0000000d, - TCP_PERF_SEL_ALLOC_STALL_CYCLES__GFX09 = 0x0000000e, - TCP_PERF_SEL_UNORDERED_MTYPE_STALL__GFX09 = 0x0000000f, - TCP_PERF_SEL_LFIFO_STALL_CYCLES__GFX09 = 0x00000010, - TCP_PERF_SEL_RFIFO_STALL_CYCLES__GFX09 = 0x00000011, - TCP_PERF_SEL_TCR_RDRET_STALL__GFX09 = 0x00000012, - TCP_PERF_SEL_WRITE_CONFLICT_STALL__GFX09 = 0x00000013, - TCP_PERF_SEL_HOLE_READ_STALL__GFX09 = 0x00000014, - TCP_PERF_SEL_READCONFLICT_STALL_CYCLES__GFX09 = 0x00000015, - TCP_PERF_SEL_PENDING_STALL_CYCLES__GFX09 = 0x00000016, - TCP_PERF_SEL_READFIFO_STALL_CYCLES__GFX09 = 0x00000017, - TCP_PERF_SEL_POWER_STALL__GFX09 = 0x00000018, - TCP_PERF_SEL_UTCL1_SERIALIZATION_STALL__GFX09 = 0x00000019, - TCP_PERF_SEL_TC_TA_XNACK_STALL__GFX09 = 0x0000001a, - TCP_PERF_SEL_TA_TCP_STATE_READ__GFX09 = 0x0000001b, - TCP_PERF_SEL_VOLATILE__GFX09 = 0x0000001c, - TCP_PERF_SEL_TOTAL_ACCESSES__GFX09 = 0x0000001d, - TCP_PERF_SEL_TOTAL_READ__GFX09 = 0x0000001e, - TCP_PERF_SEL_TOTAL_NON_READ__GFX09 = 0x0000001f, - TCP_PERF_SEL_TOTAL_WRITE__GFX09 = 0x00000020, - TCP_PERF_SEL_TOTAL_HIT_LRU_READ__GFX09 = 0x00000021, - TCP_PERF_SEL_TOTAL_MISS_LRU_READ__GFX09 = 0x00000022, - TCP_PERF_SEL_TOTAL_MISS_EVICT_READ__GFX09 = 0x00000023, - TCP_PERF_SEL_TOTAL_MISS_LRU_WRITE__GFX09 = 0x00000024, - TCP_PERF_SEL_TOTAL_MISS_EVICT_WRITE__GFX09 = 0x00000025, - TCP_PERF_SEL_TOTAL_ATOMIC_WITH_RET__GFX09 = 0x00000026, - TCP_PERF_SEL_TOTAL_ATOMIC_WITHOUT_RET__GFX09 = 0x00000027, - TCP_PERF_SEL_TOTAL_WBINVL1__GFX09 = 0x00000028, - TCP_PERF_SEL_TOTAL_WBINVL1_VOL__GFX09 = 0x00000029, - TCP_PERF_SEL_SQ_TCP_INVALIDATE_VOL__GFX09 = 0x0000002a, - TCP_PERF_SEL_CP_TCP_INVALIDATE__GFX09 = 0x0000002b, - TCP_PERF_SEL_CP_TCP_INVALIDATE_VOL__GFX09 = 0x0000002c, - TCP_PERF_SEL_TOTAL_WRITEBACK_INVALIDATES__GFX09 = 0x0000002d, - TCP_PERF_SEL_SHOOTDOWN__GFX09 = 0x0000002e, - TCP_PERF_SEL_UTCL1_REQUEST__GFX09 = 0x0000002f, - TCP_PERF_SEL_UTCL1_TRANSLATION_MISS__GFX09 = 0x00000030, - TCP_PERF_SEL_UTCL1_TRANSLATION_HIT__GFX09 = 0x00000031, - TCP_PERF_SEL_UTCL1_PERMISSION_MISS__GFX09 = 0x00000032, - TCP_PERF_SEL_UTCL1_STALL_INFLIGHT_MAX__GFX09 = 0x00000033, - TCP_PERF_SEL_UTCL1_STALL_LRU_INFLIGHT__GFX09 = 0x00000034, - TCP_PERF_SEL_UTCL1_STALL_MULTI_MISS__GFX09 = 0x00000035, - TCP_PERF_SEL_UTCL1_LFIFO_FULL__GFX09 = 0x00000036, - TCP_PERF_SEL_UTCL1_STALL_LFIFO_NOT_RES__GFX09 = 0x00000037, - TCP_PERF_SEL_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS__GFX09 = 0x00000038, - TCP_PERF_SEL_CLIENT_UTCL1_INFLIGHT__GFX09 = 0x00000039, - TCP_PERF_SEL_UTCL1_UTCL2_INFLIGHT__GFX09 = 0x0000003a, - TCP_PERF_SEL_UTCL1_STALL_MISSFIFO_FULL__GFX09 = 0x0000003b, - TCP_PERF_SEL_TOTAL_CACHE_ACCESSES__GFX09 = 0x0000003c, - TCP_PERF_SEL_TAGRAM0_REQ__GFX09 = 0x0000003d, - TCP_PERF_SEL_TAGRAM1_REQ__GFX09 = 0x0000003e, - TCP_PERF_SEL_TAGRAM2_REQ__GFX09 = 0x0000003f, - TCP_PERF_SEL_TAGRAM3_REQ__GFX09 = 0x00000040, - TCP_PERF_SEL_TCP_LATENCY__GFX09 = 0x00000041, - TCP_PERF_SEL_TCC_READ_REQ_LATENCY__GFX09 = 0x00000042, - TCP_PERF_SEL_TCC_WRITE_REQ_LATENCY__GFX09 = 0x00000043, - TCP_PERF_SEL_TCC_WRITE_REQ_HOLE_LATENCY__GFX09 = 0x00000044, - TCP_PERF_SEL_TCC_READ_REQ__GFX09 = 0x00000045, - TCP_PERF_SEL_TCC_WRITE_REQ__GFX09 = 0x00000046, - TCP_PERF_SEL_TCC_ATOMIC_WITH_RET_REQ__GFX09 = 0x00000047, - TCP_PERF_SEL_TCC_ATOMIC_WITHOUT_RET_REQ__GFX09 = 0x00000048, - TCP_PERF_SEL_TCC_LRU_REQ__GFX09 = 0x00000049, - TCP_PERF_SEL_TCC_STREAM_REQ__GFX09 = 0x0000004a, - TCP_PERF_SEL_TCC_NC_READ_REQ__GFX09 = 0x0000004b, - TCP_PERF_SEL_TCC_NC_WRITE_REQ__GFX09 = 0x0000004c, - TCP_PERF_SEL_TCC_NC_ATOMIC_REQ__GFX09 = 0x0000004d, - TCP_PERF_SEL_TCC_UC_READ_REQ__GFX09 = 0x0000004e, - TCP_PERF_SEL_TCC_UC_WRITE_REQ__GFX09 = 0x0000004f, - TCP_PERF_SEL_TCC_UC_ATOMIC_REQ__GFX09 = 0x00000050, - TCP_PERF_SEL_TCC_CC_READ_REQ__GFX09 = 0x00000051, - TCP_PERF_SEL_TCC_CC_WRITE_REQ__GFX09 = 0x00000052, - TCP_PERF_SEL_TCC_CC_ATOMIC_REQ__GFX09 = 0x00000053, - TCP_PERF_SEL_TCC_DCC_REQ__GFX09 = 0x00000054, - TCP_PERF_SEL_REQ_READ_HIT_EVICT__GFX10 = 0x0000000b, - TCP_PERF_SEL_TA_TCP_XNACK_STALL__GFX101 = 0x00000026, - TCP_PERF_SEL_DATA_FIFO_STALL__GFX101 = 0x00000027, - TCP_PERF_SEL_LOD_STALL__GFX101 = 0x00000028, - TCP_PERF_SEL_POWER_STALL__GFX101 = 0x00000029, - TCP_PERF_SEL_ALLOC_STALL__GFX101 = 0x0000002a, - TCP_PERF_SEL_UNORDERED_MTYPE_STALL__GFX101 = 0x0000002b, - TCP_PERF_SEL_READ_TAGCONFLICT_STALL__GFX101 = 0x0000002c, - TCP_PERF_SEL_WRITE_TAGCONFLICT_STALL__GFX101 = 0x0000002d, - TCP_PERF_SEL_ATOMIC_TAGCONFLICT_STALL__GFX101 = 0x0000002e, - TCP_PERF_SEL_LFIFO_STALL__GFX101 = 0x0000002f, - TCP_PERF_SEL_MEM_REQ_FIFO_STALL__GFX101 = 0x00000030, - TCP_PERF_SEL_GL1_TCP_STALL__GFX101 = 0x00000031, - TCP_PERF_SEL_TCP_GL1_STARVE__GFX101 = 0x00000032, - TCP_PERF_SEL_GL1_TCP_RDRET_STALL__GFX101 = 0x00000033, - TCP_PERF_SEL_GL1_GRANT_READ_STALL__GFX101 = 0x00000034, - TCP_PERF_SEL_GL1_PENDING_STALL__GFX101 = 0x00000035, - TCP_PERF_SEL_OFIFO_INCOMPLETE_STALL__GFX101 = 0x00000036, - TCP_PERF_SEL_OFIFO_AGE_ORDER_STALL__GFX101 = 0x00000037, - TCP_PERF_SEL_TD_DATA_CYCLE_STALL__GFX101 = 0x00000038, - TCP_PERF_SEL_COMP_TEX_LOAD_STALL__GFX101 = 0x00000039, - TCP_PERF_SEL_READ_DATACONFLICT_STALL__GFX101 = 0x0000003a, - TCP_PERF_SEL_WRITE_DATACONFLICT_STALL__GFX101 = 0x0000003b, - TCP_PERF_SEL_TD_TCP_STALL__GFX101 = 0x0000003c, - TCP_PERF_SEL_UTCL0_REQUEST__GFX101 = 0x0000003d, - TCP_PERF_SEL_UTCL0_TRANSLATION_MISS__GFX101 = 0x0000003e, - TCP_PERF_SEL_UTCL0_TRANSLATION_HIT__GFX101 = 0x0000003f, - TCP_PERF_SEL_UTCL0_PERMISSION_MISS__GFX101 = 0x00000040, - TCP_PERF_SEL_UTCL0_SERIALIZATION_STALL__GFX101 = 0x00000041, - TCP_PERF_SEL_UTCL0_STALL_INFLIGHT_MAX__GFX101 = 0x00000042, - TCP_PERF_SEL_UTCL0_STALL_LRU_INFLIGHT__GFX101 = 0x00000043, - TCP_PERF_SEL_UTCL0_STALL_MULTI_MISS__GFX101 = 0x00000044, - TCP_PERF_SEL_UTCL0_STALL_LFIFO_FULL__GFX101 = 0x00000045, - TCP_PERF_SEL_UTCL0_STALL_MISSFIFO_FULL__GFX101 = 0x00000046, - TCP_PERF_SEL_UTCL0_STALL_LFIFO_NOT_RES__GFX101 = 0x00000047, - TCP_PERF_SEL_UTCL0_STALL_UTCL1_REQ_OUT_OF_CREDITS__GFX101 = 0x00000048, - TCP_PERF_SEL_CLIENT_UTCL0_INFLIGHT__GFX101 = 0x00000049, - TCP_PERF_SEL_UTCL0_UTCL1_INFLIGHT__GFX101 = 0x0000004a, - TCP_PERF_SEL_UTCL0_UTCL1_PERM_FAULT__GFX101 = 0x0000004b, - TCP_PERF_SEL_BACK_COMPAT_SWITCH__GFX101 = 0x0000004c, - TCP_PERF_SEL_DATA_FIFO_STALL__GFX103 = 0x00000026, - TCP_PERF_SEL_LOD_STALL__GFX103 = 0x00000027, - TCP_PERF_SEL_POWER_STALL__GFX103 = 0x00000028, - TCP_PERF_SEL_ALLOC_STALL__GFX103 = 0x00000029, - TCP_PERF_SEL_UNORDERED_MTYPE_STALL__GFX103 = 0x0000002a, - TCP_PERF_SEL_READ_TAGCONFLICT_STALL__GFX103 = 0x0000002b, - TCP_PERF_SEL_WRITE_TAGCONFLICT_STALL__GFX103 = 0x0000002c, - TCP_PERF_SEL_ATOMIC_TAGCONFLICT_STALL__GFX103 = 0x0000002d, - TCP_PERF_SEL_LFIFO_STALL__GFX103 = 0x0000002e, - TCP_PERF_SEL_MEM_REQ_FIFO_STALL__GFX103 = 0x0000002f, - TCP_PERF_SEL_GL1_TCP_STALL__GFX103 = 0x00000030, - TCP_PERF_SEL_TCP_GL1_STARVE__GFX103 = 0x00000031, - TCP_PERF_SEL_GL1_TCP_RDRET_STALL__GFX103 = 0x00000032, - TCP_PERF_SEL_GL1_GRANT_READ_STALL__GFX103 = 0x00000033, - TCP_PERF_SEL_GL1_PENDING_STALL__GFX103 = 0x00000034, - TCP_PERF_SEL_OFIFO_INCOMPLETE_STALL__GFX103 = 0x00000035, - TCP_PERF_SEL_OFIFO_AGE_ORDER_STALL__GFX103 = 0x00000036, - TCP_PERF_SEL_TD_DATA_CYCLE_STALL__GFX103 = 0x00000037, - TCP_PERF_SEL_COMP_TEX_LOAD_STALL__GFX103 = 0x00000038, - TCP_PERF_SEL_READ_DATACONFLICT_STALL__GFX103 = 0x00000039, - TCP_PERF_SEL_WRITE_DATACONFLICT_STALL__GFX103 = 0x0000003a, - TCP_PERF_SEL_TD_TCP_STALL__GFX103 = 0x0000003b, - TCP_PERF_SEL_BACK_COMPAT_SWITCH__GFX103 = 0x0000003c, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - TCP_PERF_SEL_REQ_NON_READ__GFX104PLUS = 0x00000010, - TCP_PERF_SEL_REQ_MISS__GFX104PLUS = 0x00000011, -#endif - TCP_PERF_SEL_REQ_WRITE_MISS_LRU__GFX10CORE = 0x00000010, - TCP_PERF_SEL_REQ_NON_READ__GFX10CORE = 0x00000011, - TCP_PERF_SEL_REQ_MISS__GFX10CORE = 0x00000012, - TCP_PERF_SEL_REQ_TAGBANK0__GFX10CORE = 0x00000013, - TCP_PERF_SEL_REQ_TAGBANK1__GFX10CORE = 0x00000014, - TCP_PERF_SEL_REQ_TAGBANK2__GFX10CORE = 0x00000015, - TCP_PERF_SEL_REQ_TAGBANK3__GFX10CORE = 0x00000016, - TCP_PERF_SEL_REQ_MISS_TAGBANK0__GFX10CORE = 0x00000017, - TCP_PERF_SEL_REQ_MISS_TAGBANK1__GFX10CORE = 0x00000018, - TCP_PERF_SEL_REQ_MISS_TAGBANK2__GFX10CORE = 0x00000019, - TCP_PERF_SEL_REQ_MISS_TAGBANK3__GFX10CORE = 0x0000001a, - TCP_PERF_SEL_GL1_REQ_READ__GFX10CORE = 0x0000001b, - TCP_PERF_SEL_GL1_REQ_READ_128B__GFX10CORE = 0x0000001c, - TCP_PERF_SEL_GL1_REQ_READ_64B__GFX10CORE = 0x0000001d, - TCP_PERF_SEL_GL1_REQ_WRITE__GFX10CORE = 0x0000001e, - TCP_PERF_SEL_GL1_REQ_ATOMIC_WITH_RET__GFX10CORE = 0x0000001f, - TCP_PERF_SEL_GL1_REQ_ATOMIC_WITHOUT_RET__GFX10CORE = 0x00000020, - TCP_PERF_SEL_GL1_READ_LATENCY__GFX10CORE = 0x00000021, - TCP_PERF_SEL_GL1_WRITE_LATENCY__GFX10CORE = 0x00000022, - TCP_PERF_SEL_TCP_LATENCY__GFX10CORE = 0x00000023, - TCP_PERF_SEL_TCP_TA_REQ_STALL__GFX10CORE = 0x00000024, - TCP_PERF_SEL_TA_TCP_REQ_STARVE__GFX10CORE = 0x00000025, - TCP_PERF_SEL_TA_REQ__GFX10PLUS = 0x00000002, - TCP_PERF_SEL_TA_REQ_STATE_READ__GFX10PLUS = 0x00000003, - TCP_PERF_SEL_TA_REQ_READ__GFX10PLUS = 0x00000004, - TCP_PERF_SEL_TA_REQ_WRITE__GFX10PLUS = 0x00000005, - TCP_PERF_SEL_TA_REQ_ATOMIC_WITH_RET__GFX10PLUS = 0x00000006, - TCP_PERF_SEL_TA_REQ_ATOMIC_WITHOUT_RET__GFX10PLUS = 0x00000007, - TCP_PERF_SEL_TA_REQ_GL0_INV__GFX10PLUS = 0x00000008, - TCP_PERF_SEL_REQ__GFX10PLUS = 0x00000009, - TCP_PERF_SEL_REQ_READ__GFX10PLUS = 0x0000000a, - TCP_PERF_SEL_REQ_READ_HIT_LRU__GFX10PLUS = 0x0000000c, - TCP_PERF_SEL_REQ_READ_MISS_EVICT__GFX10PLUS = 0x0000000d, - TCP_PERF_SEL_REQ_WRITE__GFX10PLUS = 0x0000000e, - TCP_PERF_SEL_REQ_WRITE_MISS_EVICT__GFX10PLUS = 0x0000000f, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - TCP_PERF_SEL_REQ_TAGBANK0_SET0__GFX11 = 0x00000012, - TCP_PERF_SEL_REQ_TAGBANK0_SET1__GFX11 = 0x00000013, - TCP_PERF_SEL_REQ_TAGBANK1_SET0__GFX11 = 0x00000014, - TCP_PERF_SEL_REQ_TAGBANK1_SET1__GFX11 = 0x00000015, - TCP_PERF_SEL_REQ_TAGBANK2_SET0__GFX11 = 0x00000016, - TCP_PERF_SEL_REQ_TAGBANK2_SET1__GFX11 = 0x00000017, - TCP_PERF_SEL_REQ_TAGBANK3_SET0__GFX11 = 0x00000018, - TCP_PERF_SEL_REQ_TAGBANK3_SET1__GFX11 = 0x00000019, - TCP_PERF_SEL_REQ_MISS_TAGBANK0__GFX11 = 0x0000001a, - TCP_PERF_SEL_REQ_MISS_TAGBANK1__GFX11 = 0x0000001b, - TCP_PERF_SEL_REQ_MISS_TAGBANK2__GFX11 = 0x0000001c, - TCP_PERF_SEL_REQ_MISS_TAGBANK3__GFX11 = 0x0000001d, - TCP_PERF_SEL_GL1_REQ_READ__GFX11 = 0x0000001e, - TCP_PERF_SEL_GL1_REQ_READ_128B__GFX11 = 0x0000001f, - TCP_PERF_SEL_GL1_REQ_READ_64B__GFX11 = 0x00000020, - TCP_PERF_SEL_GL1_REQ_WRITE__GFX11 = 0x00000021, - TCP_PERF_SEL_GL1_REQ_ATOMIC_WITH_RET__GFX11 = 0x00000022, - TCP_PERF_SEL_GL1_REQ_ATOMIC_WITHOUT_RET__GFX11 = 0x00000023, - TCP_PERF_SEL_GL1_READ_LATENCY__GFX11 = 0x00000024, - TCP_PERF_SEL_GL1_WRITE_LATENCY__GFX11 = 0x00000025, - TCP_PERF_SEL_TCP_LATENCY__GFX11 = 0x00000026, - TCP_PERF_SEL_TCP_TA_REQ_STALL__GFX11 = 0x00000027, - TCP_PERF_SEL_TA_TCP_REQ_STARVE__GFX11 = 0x00000028, - TCP_PERF_SEL_DATA_FIFO_STALL__GFX11 = 0x00000029, - TCP_PERF_SEL_LOD_STALL__GFX11 = 0x0000002a, - TCP_PERF_SEL_POWER_STALL__GFX11 = 0x0000002b, - TCP_PERF_SEL_ALLOC_STALL__GFX11 = 0x0000002c, - TCP_PERF_SEL_UNORDERED_MTYPE_STALL__GFX11 = 0x0000002d, - TCP_PERF_SEL_READ_TAGCONFLICT_STALL__GFX11 = 0x0000002e, - TCP_PERF_SEL_WRITE_TAGCONFLICT_STALL__GFX11 = 0x0000002f, - TCP_PERF_SEL_ATOMIC_TAGCONFLICT_STALL__GFX11 = 0x00000030, - TCP_PERF_SEL_LFIFO_STALL__GFX11 = 0x00000031, - TCP_PERF_SEL_MEM_REQ_FIFO_STALL__GFX11 = 0x00000032, - TCP_PERF_SEL_GL1_TCP_BACK_PRESSURE__GFX11 = 0x00000033, - TCP_PERF_SEL_GL1_TCP_RDRET_STALL__GFX11 = 0x00000034, - TCP_PERF_SEL_GL1_GRANT_READ_STALL__GFX11 = 0x00000035, - TCP_PERF_SEL_GL1_PENDING_STALL__GFX11 = 0x00000036, - TCP_PERF_SEL_OFIFO_INCOMPLETE_STALL__GFX11 = 0x00000037, - TCP_PERF_SEL_OFIFO_AGE_ORDER_STALL__GFX11 = 0x00000038, - TCP_PERF_SEL_TD_DATA_CYCLE_STALL__GFX11 = 0x00000039, - TCP_PERF_SEL_COMP_TEX_LOAD_STALL__GFX11 = 0x0000003a, - TCP_PERF_SEL_READ_DATACONFLICT_STALL__GFX11 = 0x0000003b, - TCP_PERF_SEL_WRITE_DATACONFLICT_STALL__GFX11 = 0x0000003c, - TCP_PERF_SEL_TD_TCP_STALL__GFX11 = 0x0000003d, - TCP_PERF_SEL_TA_REQ_BUFFERNOP__GFX11 = 0x0000003e, - TCP_PERF_SEL_WRITECOMBINE_ENDCLAUSE__GFX11 = 0x0000003f, - TCP_PERF_SEL_TAGFAKE_EOW__GFX11 = 0x00000040, - TCP_PERF_SEL_REQ_TAG_MATCH_AND_NOT_VALID__GFX11 = 0x00000041, - TCP_PERF_SEL_BURST_BIN_WRITECOMBINE_0__GFX11 = 0x00000042, - TCP_PERF_SEL_BURST_BIN_WRITECOMBINE_1to2__GFX11 = 0x00000043, - TCP_PERF_SEL_BURST_BIN_WRITECOMBINE_3to4__GFX11 = 0x00000044, - TCP_PERF_SEL_BURST_BIN_WRITECOMBINE_5to8__GFX11 = 0x00000045, - TCP_PERF_SEL_BURST_BIN_WRITECOMBINE_9to16__GFX11 = 0x00000046, - TCP_PERF_SEL_BURST_BIN_WRITECOMBINE_gt16__GFX11 = 0x00000047, - TCP_PERF_SEL_BURST_BIN_READHIT_0__GFX11 = 0x00000048, - TCP_PERF_SEL_BURST_BIN_READHIT_1__GFX11 = 0x00000049, - TCP_PERF_SEL_BURST_BIN_READHIT_2to4__GFX11 = 0x0000004a, - TCP_PERF_SEL_BURST_BIN_READHIT_5to8__GFX11 = 0x0000004b, - TCP_PERF_SEL_BURST_BIN_READHIT_9to16__GFX11 = 0x0000004c, - TCP_PERF_SEL_BURST_BIN_READHIT_gt16__GFX11 = 0x0000004d, -#endif -} TCP_PERFCOUNT_SELECT; - -constexpr unsigned int MaxTcpPerfcountSelectGfx09 = TCP_PERF_SEL_TCC_DCC_REQ__GFX09; -constexpr unsigned int MaxTcpPerfcountSelectGfx101 = TCP_PERF_SEL_BACK_COMPAT_SWITCH__GFX101; -constexpr unsigned int MaxTcpPerfcountSelectGfx103 = TCP_PERF_SEL_BACK_COMPAT_SWITCH__GFX103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxTcpPerfcountSelectGfx11 = TCP_PERF_SEL_BURST_BIN_READHIT_gt16__GFX11; -#endif - -typedef enum TCP_WATCH_MODES { - TCP_WATCH_MODE_READ = 0x00000000, - TCP_WATCH_MODE_NONREAD = 0x00000001, - TCP_WATCH_MODE_ATOMIC = 0x00000002, - TCP_WATCH_MODE_ALL = 0x00000003, -} TCP_WATCH_MODES; - -typedef enum TC_EA_CID { - TC_EA_CID_RT = 0x00000000, - TC_EA_CID_FMASK = 0x00000001, - TC_EA_CID_DCC = 0x00000002, - TC_EA_CID_TCPMETA = 0x00000003, - TC_EA_CID_Z = 0x00000004, - TC_EA_CID_STENCIL = 0x00000005, - TC_EA_CID_HTILE = 0x00000006, - TC_EA_CID_MISC = 0x00000007, - TC_EA_CID_TCP = 0x00000008, - TC_EA_CID_SQC = 0x00000009, - TC_EA_CID_CPF = 0x0000000a, - TC_EA_CID_CPG = 0x0000000b, - TC_EA_CID_IA = 0x0000000c, - TC_EA_CID_WD = 0x0000000d, - TC_EA_CID_PA = 0x0000000e, - TC_EA_CID_UTCL2_TPI = 0x0000000f, -} TC_EA_CID; - -typedef enum TC_MICRO_TILE_MODE { - MICRO_TILE_MODE_LINEAR = 0x00000000, - MICRO_TILE_MODE_STD_3D = 0x00000003, - MICRO_TILE_MODE_DISPLAY_2D = 0x00000004, - MICRO_TILE_MODE_DISPLAY_3D = 0x00000005, - MICRO_TILE_MODE_STD_2D__CORE = 0x00000002, - MICRO_TILE_MODE_ROTATED__GFX09 = 0x00000001, - MICRO_TILE_MODE_Z_2D__GFX09 = 0x00000006, - MICRO_TILE_MODE_Z_3D__GFX09 = 0x00000007, - MICRO_TILE_MODE_Z__GFX10COREPLUS = 0x00000006, - MICRO_TILE_MODE_RENDER_TARGET__GFX10PLUS = 0x00000001, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - MICRO_TILE_MODE_Z_VAR__GFX11 = 0x00000007, -#endif -#if CHIP_HDR_NAVI21 - MICRO_TILE_MODE_Z_VAR__NV21 = 0x00000007, -#endif -#if CHIP_HDR_NAVI22 - MICRO_TILE_MODE_Z_VAR__NV22 = 0x00000007, -#endif -#if CHIP_HDR_NAVI23 - MICRO_TILE_MODE_Z_VAR__NV23 = 0x00000007, -#endif -#if CHIP_HDR_NAVI24 - MICRO_TILE_MODE_Z_VAR__NV24 = 0x00000007, -#endif -} TC_MICRO_TILE_MODE; - -typedef enum TC_NACKS { - TC_NACK_NO_FAULT = 0x00000000, - TC_NACK_PAGE_FAULT = 0x00000001, - TC_NACK_PROTECTION_FAULT = 0x00000002, - TC_NACK_DATA_ERROR = 0x00000003, -} TC_NACKS; - -typedef enum TC_OP { - TC_OP_READ = 0x00000000, - TC_OP_ATOMIC_FCMPSWAP_RTN_32 = 0x00000001, - TC_OP_ATOMIC_FMIN_RTN_32 = 0x00000002, - TC_OP_ATOMIC_FMAX_RTN_32 = 0x00000003, - TC_OP_RESERVED_FOP_RTN_32_0 = 0x00000004, - TC_OP_RESERVED_FOP_RTN_32_2 = 0x00000006, - TC_OP_ATOMIC_SWAP_RTN_32 = 0x00000007, - TC_OP_ATOMIC_CMPSWAP_RTN_32 = 0x00000008, - TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_RTN_32 = 0x00000009, - TC_OP_ATOMIC_FMIN_FLUSH_DENORM_RTN_32 = 0x0000000a, - TC_OP_ATOMIC_FMAX_FLUSH_DENORM_RTN_32 = 0x0000000b, - TC_OP_PROBE_FILTER = 0x0000000c, - TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_32_2 = 0x0000000e, - TC_OP_ATOMIC_ADD_RTN_32 = 0x0000000f, - TC_OP_ATOMIC_SUB_RTN_32 = 0x00000010, - TC_OP_ATOMIC_SMIN_RTN_32 = 0x00000011, - TC_OP_ATOMIC_UMIN_RTN_32 = 0x00000012, - TC_OP_ATOMIC_SMAX_RTN_32 = 0x00000013, - TC_OP_ATOMIC_UMAX_RTN_32 = 0x00000014, - TC_OP_ATOMIC_AND_RTN_32 = 0x00000015, - TC_OP_ATOMIC_OR_RTN_32 = 0x00000016, - TC_OP_ATOMIC_XOR_RTN_32 = 0x00000017, - TC_OP_ATOMIC_INC_RTN_32 = 0x00000018, - TC_OP_ATOMIC_DEC_RTN_32 = 0x00000019, - TC_OP_WBINVL1_VOL = 0x0000001a, - TC_OP_WBINVL1_SD = 0x0000001b, - TC_OP_RESERVED_NON_FLOAT_RTN_32_0 = 0x0000001c, - TC_OP_RESERVED_NON_FLOAT_RTN_32_1 = 0x0000001d, - TC_OP_RESERVED_NON_FLOAT_RTN_32_2 = 0x0000001e, - TC_OP_RESERVED_NON_FLOAT_RTN_32_3 = 0x0000001f, - TC_OP_WRITE = 0x00000020, - TC_OP_ATOMIC_FCMPSWAP_RTN_64 = 0x00000021, - TC_OP_ATOMIC_FMIN_RTN_64 = 0x00000022, - TC_OP_ATOMIC_FMAX_RTN_64 = 0x00000023, - TC_OP_RESERVED_FOP_RTN_64_0 = 0x00000024, - TC_OP_RESERVED_FOP_RTN_64_1 = 0x00000025, - TC_OP_RESERVED_FOP_RTN_64_2 = 0x00000026, - TC_OP_ATOMIC_SWAP_RTN_64 = 0x00000027, - TC_OP_ATOMIC_CMPSWAP_RTN_64 = 0x00000028, - TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_RTN_64 = 0x00000029, - TC_OP_ATOMIC_FMIN_FLUSH_DENORM_RTN_64 = 0x0000002a, - TC_OP_ATOMIC_FMAX_FLUSH_DENORM_RTN_64 = 0x0000002b, - TC_OP_WBINVL2_SD = 0x0000002c, - TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_64_0 = 0x0000002d, - TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_64_1 = 0x0000002e, - TC_OP_ATOMIC_ADD_RTN_64 = 0x0000002f, - TC_OP_ATOMIC_SUB_RTN_64 = 0x00000030, - TC_OP_ATOMIC_SMIN_RTN_64 = 0x00000031, - TC_OP_ATOMIC_UMIN_RTN_64 = 0x00000032, - TC_OP_ATOMIC_SMAX_RTN_64 = 0x00000033, - TC_OP_ATOMIC_UMAX_RTN_64 = 0x00000034, - TC_OP_ATOMIC_AND_RTN_64 = 0x00000035, - TC_OP_ATOMIC_OR_RTN_64 = 0x00000036, - TC_OP_ATOMIC_XOR_RTN_64 = 0x00000037, - TC_OP_ATOMIC_INC_RTN_64 = 0x00000038, - TC_OP_ATOMIC_DEC_RTN_64 = 0x00000039, - TC_OP_WBL2_NC = 0x0000003a, - TC_OP_WBL2_WC = 0x0000003b, - TC_OP_RESERVED_NON_FLOAT_RTN_64_1 = 0x0000003c, - TC_OP_RESERVED_NON_FLOAT_RTN_64_2 = 0x0000003d, - TC_OP_RESERVED_NON_FLOAT_RTN_64_3 = 0x0000003e, - TC_OP_RESERVED_NON_FLOAT_RTN_64_4 = 0x0000003f, - TC_OP_WBINVL1 = 0x00000040, - TC_OP_ATOMIC_FCMPSWAP_32 = 0x00000041, - TC_OP_ATOMIC_FMIN_32 = 0x00000042, - TC_OP_ATOMIC_FMAX_32 = 0x00000043, - TC_OP_RESERVED_FOP_32_0 = 0x00000044, - TC_OP_RESERVED_FOP_32_2 = 0x00000046, - TC_OP_ATOMIC_SWAP_32 = 0x00000047, - TC_OP_ATOMIC_CMPSWAP_32 = 0x00000048, - TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_32 = 0x00000049, - TC_OP_ATOMIC_FMIN_FLUSH_DENORM_32 = 0x0000004a, - TC_OP_ATOMIC_FMAX_FLUSH_DENORM_32 = 0x0000004b, - TC_OP_INV_METADATA = 0x0000004c, - TC_OP_RESERVED_FOP_FLUSH_DENORM_32_2 = 0x0000004e, - TC_OP_ATOMIC_ADD_32 = 0x0000004f, - TC_OP_ATOMIC_SUB_32 = 0x00000050, - TC_OP_ATOMIC_SMIN_32 = 0x00000051, - TC_OP_ATOMIC_UMIN_32 = 0x00000052, - TC_OP_ATOMIC_SMAX_32 = 0x00000053, - TC_OP_ATOMIC_UMAX_32 = 0x00000054, - TC_OP_ATOMIC_AND_32 = 0x00000055, - TC_OP_ATOMIC_OR_32 = 0x00000056, - TC_OP_ATOMIC_XOR_32 = 0x00000057, - TC_OP_ATOMIC_INC_32 = 0x00000058, - TC_OP_ATOMIC_DEC_32 = 0x00000059, - TC_OP_INVL2_NC = 0x0000005a, - TC_OP_NOP_RTN0 = 0x0000005b, - TC_OP_RESERVED_NON_FLOAT_32_1 = 0x0000005c, - TC_OP_RESERVED_NON_FLOAT_32_2 = 0x0000005d, - TC_OP_RESERVED_NON_FLOAT_32_3 = 0x0000005e, - TC_OP_RESERVED_NON_FLOAT_32_4 = 0x0000005f, - TC_OP_WBINVL2 = 0x00000060, - TC_OP_ATOMIC_FCMPSWAP_64 = 0x00000061, - TC_OP_ATOMIC_FMIN_64 = 0x00000062, - TC_OP_ATOMIC_FMAX_64 = 0x00000063, - TC_OP_RESERVED_FOP_64_0 = 0x00000064, - TC_OP_RESERVED_FOP_64_1 = 0x00000065, - TC_OP_RESERVED_FOP_64_2 = 0x00000066, - TC_OP_ATOMIC_SWAP_64 = 0x00000067, - TC_OP_ATOMIC_CMPSWAP_64 = 0x00000068, - TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_64 = 0x00000069, - TC_OP_ATOMIC_FMIN_FLUSH_DENORM_64 = 0x0000006a, - TC_OP_ATOMIC_FMAX_FLUSH_DENORM_64 = 0x0000006b, - TC_OP_RESERVED_FOP_FLUSH_DENORM_64_0 = 0x0000006c, - TC_OP_RESERVED_FOP_FLUSH_DENORM_64_1 = 0x0000006d, - TC_OP_RESERVED_FOP_FLUSH_DENORM_64_2 = 0x0000006e, - TC_OP_ATOMIC_ADD_64 = 0x0000006f, - TC_OP_ATOMIC_SUB_64 = 0x00000070, - TC_OP_ATOMIC_SMIN_64 = 0x00000071, - TC_OP_ATOMIC_UMIN_64 = 0x00000072, - TC_OP_ATOMIC_SMAX_64 = 0x00000073, - TC_OP_ATOMIC_UMAX_64 = 0x00000074, - TC_OP_ATOMIC_AND_64 = 0x00000075, - TC_OP_ATOMIC_OR_64 = 0x00000076, - TC_OP_ATOMIC_XOR_64 = 0x00000077, - TC_OP_ATOMIC_INC_64 = 0x00000078, - TC_OP_ATOMIC_DEC_64 = 0x00000079, - TC_OP_WBINVL2_NC = 0x0000007a, - TC_OP_NOP_ACK = 0x0000007b, - TC_OP_RESERVED_NON_FLOAT_64_1 = 0x0000007c, - TC_OP_RESERVED_NON_FLOAT_64_2 = 0x0000007d, - TC_OP_RESERVED_NON_FLOAT_64_3 = 0x0000007e, - TC_OP_RESERVED_NON_FLOAT_64_4 = 0x0000007f, - TC_OP_RESERVED_FOP_RTN_32_1__GFX09_10 = 0x00000005, - TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_32_1__GFX09_10 = 0x0000000d, - TC_OP_RESERVED_FOP_32_1__GFX09_10 = 0x00000045, - TC_OP_RESERVED_FOP_FLUSH_DENORM_32_1__GFX09_10 = 0x0000004d, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - TC_OP_RESERVED_FADD_RTN_32__GFX11 = 0x00000005, - TC_OP_ATOMIC_FADD_FLUSH_DENORM_RTN_32__GFX11 = 0x0000000d, - TC_OP_RESERVED_FADD_32__GFX11 = 0x00000045, - TC_OP_ATOMIC_FADD_FLUSH_DENORM_32__GFX11 = 0x0000004d, -#endif -} TC_OP; - -typedef enum TC_OP_MASKS { - TC_OP_MASK_FLUSH_DENROM = 0x00000008, - TC_OP_MASK_64 = 0x00000020, - TC_OP_MASK_NO_RTN = 0x00000040, -} TC_OP_MASKS; - -typedef enum TD_PERFCOUNT_SEL { - TD_PERF_SEL_none = 0x00000000, - TD_PERF_SEL_td_busy = 0x00000001, - TD_PERF_SEL_input_busy = 0x00000002, - TD_PERF_SEL_output_busy__GFX09 = 0x00000003, - TD_PERF_SEL_lerp_busy__GFX09 = 0x00000004, - TD_PERF_SEL_reg_sclk_vld__GFX09 = 0x00000005, - TD_PERF_SEL_local_cg_dyn_sclk_grp0_en__GFX09 = 0x00000006, - TD_PERF_SEL_local_cg_dyn_sclk_grp1_en__GFX09 = 0x00000007, - TD_PERF_SEL_local_cg_dyn_sclk_grp4_en__GFX09 = 0x00000008, - TD_PERF_SEL_local_cg_dyn_sclk_grp5_en__GFX09 = 0x00000009, - TD_PERF_SEL_tc_td_fifo_full__GFX09 = 0x0000000a, - TD_PERF_SEL_output_fifo_full__GFX09 = 0x0000000b, - TD_PERF_SEL_RESERVED_12__GFX09 = 0x0000000c, - TD_PERF_SEL_RESERVED_13__GFX09 = 0x0000000d, - TD_PERF_SEL_RESERVED_14__GFX09 = 0x0000000e, - TD_PERF_SEL_tc_stall__GFX09 = 0x0000000f, - TD_PERF_SEL_pc_stall__GFX09 = 0x00000010, - TD_PERF_SEL_gds_stall__GFX09 = 0x00000011, - TD_PERF_SEL_RESERVED_18__GFX09 = 0x00000012, - TD_PERF_SEL_RESERVED_19__GFX09 = 0x00000013, - TD_PERF_SEL_gather4_wavefront__GFX09 = 0x00000014, - TD_PERF_SEL_gather4h_wavefront__GFX09 = 0x00000015, - TD_PERF_SEL_gather4h_packed_wavefront__GFX09 = 0x00000016, - TD_PERF_SEL_gather8h_packed_wavefront__GFX09 = 0x00000017, - TD_PERF_SEL_sample_c_wavefront__GFX09 = 0x00000018, - TD_PERF_SEL_load_wavefront__GFX09 = 0x00000019, - TD_PERF_SEL_atomic_wavefront__GFX09 = 0x0000001a, - TD_PERF_SEL_store_wavefront__GFX09 = 0x0000001b, - TD_PERF_SEL_ldfptr_wavefront__GFX09 = 0x0000001c, - TD_PERF_SEL_d16_en_wavefront__GFX09 = 0x0000001d, - TD_PERF_SEL_bypass_filter_wavefront__GFX09 = 0x0000001e, - TD_PERF_SEL_min_max_filter_wavefront__GFX09 = 0x0000001f, - TD_PERF_SEL_coalescable_wavefront__GFX09 = 0x00000020, - TD_PERF_SEL_coalesced_phase__GFX09 = 0x00000021, - TD_PERF_SEL_four_phase_wavefront__GFX09 = 0x00000022, - TD_PERF_SEL_eight_phase_wavefront__GFX09 = 0x00000023, - TD_PERF_SEL_sixteen_phase_wavefront__GFX09 = 0x00000024, - TD_PERF_SEL_four_phase_forward_wavefront__GFX09 = 0x00000025, - TD_PERF_SEL_write_ack_wavefront__GFX09 = 0x00000026, - TD_PERF_SEL_RESERVED_39__GFX09 = 0x00000027, - TD_PERF_SEL_user_defined_border__GFX09 = 0x00000028, - TD_PERF_SEL_white_border__GFX09 = 0x00000029, - TD_PERF_SEL_opaque_black_border__GFX09 = 0x0000002a, - TD_PERF_SEL_RESERVED_43__GFX09 = 0x0000002b, - TD_PERF_SEL_RESERVED_44__GFX09 = 0x0000002c, - TD_PERF_SEL_nack__GFX09 = 0x0000002d, - TD_PERF_SEL_td_sp_traffic__GFX09 = 0x0000002e, - TD_PERF_SEL_consume_gds_traffic__GFX09 = 0x0000002f, - TD_PERF_SEL_addresscmd_poison__GFX09 = 0x00000030, - TD_PERF_SEL_data_poison__GFX09 = 0x00000031, - TD_PERF_SEL_start_cycle_0__GFX09 = 0x00000032, - TD_PERF_SEL_start_cycle_1__GFX09 = 0x00000033, - TD_PERF_SEL_start_cycle_2__GFX09 = 0x00000034, - TD_PERF_SEL_start_cycle_3__GFX09 = 0x00000035, - TD_PERF_SEL_null_cycle_output__GFX09 = 0x00000036, - TD_PERF_SEL_d16_data_packed__GFX09 = 0x00000037, - TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt__GFX09 = 0x00000038, - TD_PERF_SEL_sampler_sclk_on_nofilter_sclk_off__GFX101 = 0x00000006, - TD_PERF_SEL_nofilter_sclk_on_sampler_sclk_off__GFX101 = 0x00000007, - TD_PERF_SEL_core_state_ram_max_cnt__GFX101 = 0x00000008, - TD_PERF_SEL_core_state_rams_read__GFX101 = 0x00000009, - TD_PERF_SEL_weight_data_rams_read__GFX101 = 0x0000000a, - TD_PERF_SEL_reference_data_rams_read__GFX101 = 0x0000000b, - TD_PERF_SEL_tc_td_ram_fifo_full__GFX101 = 0x0000000c, - TD_PERF_SEL_tc_td_ram_fifo_max_cnt__GFX101 = 0x0000000d, - TD_PERF_SEL_tc_td_data_fifo_full__GFX101 = 0x0000000e, - TD_PERF_SEL_input_state_fifo_full__GFX101 = 0x0000000f, - TD_PERF_SEL_ta_data_stall__GFX101 = 0x00000010, - TD_PERF_SEL_tc_data_stall__GFX101 = 0x00000011, - TD_PERF_SEL_tc_ram_stall__GFX101 = 0x00000012, - TD_PERF_SEL_lds_stall__GFX101 = 0x00000013, - TD_PERF_SEL_sampler_pkr_full__GFX101 = 0x00000014, - TD_PERF_SEL_nofilter_pkr_full__GFX101 = 0x00000015, - TD_PERF_SEL_gather4_wavefront__GFX101 = 0x00000016, - TD_PERF_SEL_gather4h_wavefront__GFX101 = 0x00000017, - TD_PERF_SEL_gather4h_packed_wavefront__GFX101 = 0x00000018, - TD_PERF_SEL_gather8h_packed_wavefront__GFX101 = 0x00000019, - TD_PERF_SEL_sample_c_wavefront__GFX101 = 0x0000001a, - TD_PERF_SEL_load_wavefront__GFX101 = 0x0000001b, - TD_PERF_SEL_ldfptr_wavefront__GFX101 = 0x0000001c, - TD_PERF_SEL_RESERVED_29__GFX101 = 0x0000001d, - TD_PERF_SEL_write_ack_wavefront__GFX101 = 0x0000001e, - TD_PERF_SEL_d16_en_wavefront__GFX101 = 0x0000001f, - TD_PERF_SEL_bypassLerp_wavefront__GFX101 = 0x00000020, - TD_PERF_SEL_min_max_filter_wavefront__GFX101 = 0x00000021, - TD_PERF_SEL_one_comp_wavefront__GFX101 = 0x00000022, - TD_PERF_SEL_two_comp_wavefront__GFX101 = 0x00000023, - TD_PERF_SEL_three_comp_wavefront__GFX101 = 0x00000024, - TD_PERF_SEL_four_comp_wavefront__GFX101 = 0x00000025, - TD_PERF_SEL_user_defined_border__GFX101 = 0x00000026, - TD_PERF_SEL_white_border__GFX101 = 0x00000027, - TD_PERF_SEL_opaque_black_border__GFX101 = 0x00000028, - TD_PERF_SEL_lod_warn_from_ta__GFX101 = 0x00000029, - TD_PERF_SEL_wavefront_dest_is_lds__GFX101 = 0x0000002a, - TD_PERF_SEL_td_cycling_of_nofilter_instr__GFX101 = 0x0000002b, - TD_PERF_SEL_tc_cycling_of_nofilter_instr__GFX101 = 0x0000002c, - TD_PERF_SEL_out_of_order_instr__GFX101 = 0x0000002d, - TD_PERF_SEL_total_num_instr__GFX101 = 0x0000002e, - TD_PERF_SEL_mixmode_instruction__GFX101 = 0x0000002f, - TD_PERF_SEL_mixmode_resource__GFX101 = 0x00000030, - TD_PERF_SEL_status_packet__GFX101 = 0x00000031, - TD_PERF_SEL_address_cmd_poison__GFX101 = 0x00000032, - TD_PERF_SEL_data_poison__GFX101 = 0x00000033, - TD_PERF_SEL_done_scoreboard_max_stored_cnt__GFX101 = 0x00000034, - TD_PERF_SEL_done_scoreboard_max_waiting_cnt__GFX101 = 0x00000035, - TD_PERF_SEL_done_scoreboard_not_empty__GFX101 = 0x00000036, - TD_PERF_SEL_done_scoreboard_is_full__GFX101 = 0x00000037, - TD_PERF_SEL_done_scoreboard_bp_due_to_ooo__GFX101 = 0x00000038, - TD_PERF_SEL_done_scoreboard_bp_due_to_lds__GFX101 = 0x00000039, - TD_PERF_SEL_nofilter_formatters_turned_on__GFX101 = 0x0000003a, - TD_PERF_SEL_nofilter_popcount_dmask_gt_num_comp_of_fmt__GFX101 = 0x0000003b, - TD_PERF_SEL_nofilter_popcount_dmask_lt_num_comp_of_fmt__GFX101 = 0x0000003c, - TD_PERF_SEL_sampler_bilerp_sclk_en__GFX103 = 0x00000008, - TD_PERF_SEL_sampler_bypass_sclk_en__GFX103 = 0x00000009, - TD_PERF_SEL_sampler_minmax_sclk_en__GFX103 = 0x0000000a, - TD_PERF_SEL_sampler_accum_sclk_en__GFX103 = 0x0000000b, - TD_PERF_SEL_sampler_format_flt_sclk_en__GFX103 = 0x0000000c, - TD_PERF_SEL_sampler_format_fxdpt_sclk_en__GFX103 = 0x0000000d, - TD_PERF_SEL_sampler_out_sclk_en__GFX103 = 0x0000000e, - TD_PERF_SEL_nofilter_d16_sclk_en__GFX103 = 0x00000010, - TD_PERF_SEL_RESERVED_17__GFX103 = 0x00000011, - TD_PERF_SEL_RESERVED_22__GFX103 = 0x00000016, - TD_PERF_SEL_ray_tracing_bvh4_sclk_en__GFX103 = 0x00000017, - TD_PERF_SEL_gather4h_packed_instr__GFX103 = 0x00000034, - TD_PERF_SEL_gather8h_packed_instr__GFX103 = 0x00000035, - TD_PERF_SEL_address_cmd_poison__GFX103 = 0x00000057, - TD_PERF_SEL_data_poison__GFX103 = 0x00000058, - TD_PERF_SEL_core_state_ram_max_cnt__GFX103COREPLUS = 0x00000020, - TD_PERF_SEL_core_state_rams_read__GFX103COREPLUS = 0x00000021, - TD_PERF_SEL_gather4h_instr__GFX103COREPLUS = 0x00000033, - TD_PERF_SEL_mixmode_instr__GFX103COREPLUS = 0x00000054, - TD_PERF_SEL_mixmode_resource__GFX103COREPLUS = 0x00000055, - TD_PERF_SEL_nofilter_byte_cycling_4cycles__GFX103COREPLUS = 0x000000ba, - TD_PERF_SEL_nofilter_byte_cycling_8cycles__GFX103COREPLUS = 0x000000bb, - TD_PERF_SEL_nofilter_byte_cycling_16cycles__GFX103COREPLUS = 0x000000bc, - TD_PERF_SEL_nofilter_sclk_en__GFX103DERIVATIVE = 0x0000000f, - TD_PERF_SEL_RESERVED_18__GFX103DERIVATIVE = 0x00000012, - TD_PERF_SEL_RESERVED_19__GFX103DERIVATIVE = 0x00000013, - TD_PERF_SEL_RESERVED_20__GFX103DERIVATIVE = 0x00000014, - TD_PERF_SEL_RESERVED_21__GFX103DERIVATIVE = 0x00000015, - TD_PERF_SEL_ldfptr_instr__GFX103DERIVATIVE = 0x00000039, - TD_PERF_SEL_ray_tracing_bvh4_busy__GFX103PLUSEXCLUSIVE = 0x00000006, - TD_PERF_SEL_sampler_core_sclk_en__GFX103PLUSEXCLUSIVE = 0x00000007, - TD_PERF_SEL_ray_tracing_bvh4_box_sclk_en__GFX103PLUSEXCLUSIVE = 0x00000018, - TD_PERF_SEL_ray_tracing_bvh4_tri_sclk_en__GFX103PLUSEXCLUSIVE = 0x00000019, - TD_PERF_SEL_sampler_sclk_on_nofilter_sclk_off__GFX103PLUSEXCLUSIVE = 0x0000001a, - TD_PERF_SEL_nofilter_sclk_on_sampler_sclk_off__GFX103PLUSEXCLUSIVE = 0x0000001b, - TD_PERF_SEL_all_pipes_sclk_on_at_same_time__GFX103PLUSEXCLUSIVE = 0x0000001c, - TD_PERF_SEL_sampler_and_nofilter_sclk_on_bvh4_sclk_off__GFX103PLUSEXCLUSIVE = 0x0000001d, - TD_PERF_SEL_sampler_and_bvh4_sclk_on_nofilter_sclk_off__GFX103PLUSEXCLUSIVE = 0x0000001e, - TD_PERF_SEL_nofilter_and_bvh4_sclk_on_sampler_sclk_off__GFX103PLUSEXCLUSIVE = 0x0000001f, - TD_PERF_SEL_weight_data_rams_read__GFX103PLUSEXCLUSIVE = 0x00000022, - TD_PERF_SEL_reference_data_rams_read__GFX103PLUSEXCLUSIVE = 0x00000023, - TD_PERF_SEL_tc_td_ram_fifo_full__GFX103PLUSEXCLUSIVE = 0x00000024, - TD_PERF_SEL_tc_td_ram_fifo_max_cnt__GFX103PLUSEXCLUSIVE = 0x00000025, - TD_PERF_SEL_tc_td_data_fifo_full__GFX103PLUSEXCLUSIVE = 0x00000026, - TD_PERF_SEL_input_state_fifo_full__GFX103PLUSEXCLUSIVE = 0x00000027, - TD_PERF_SEL_ta_data_stall__GFX103PLUSEXCLUSIVE = 0x00000028, - TD_PERF_SEL_tc_data_stall__GFX103PLUSEXCLUSIVE = 0x00000029, - TD_PERF_SEL_tc_ram_stall__GFX103PLUSEXCLUSIVE = 0x0000002a, - TD_PERF_SEL_lds_stall__GFX103PLUSEXCLUSIVE = 0x0000002b, - TD_PERF_SEL_sampler_pkr_full__GFX103PLUSEXCLUSIVE = 0x0000002c, - TD_PERF_SEL_sampler_pkr_full_due_to_arb__GFX103PLUSEXCLUSIVE = 0x0000002d, - TD_PERF_SEL_nofilter_pkr_full__GFX103PLUSEXCLUSIVE = 0x0000002e, - TD_PERF_SEL_nofilter_pkr_full_due_to_arb__GFX103PLUSEXCLUSIVE = 0x0000002f, - TD_PERF_SEL_ray_tracing_bvh4_pkr_full__GFX103PLUSEXCLUSIVE = 0x00000030, - TD_PERF_SEL_ray_tracing_bvh4_pkr_full_due_to_arb__GFX103PLUSEXCLUSIVE = 0x00000031, - TD_PERF_SEL_gather4_instr__GFX103PLUSEXCLUSIVE = 0x00000032, - TD_PERF_SEL_sample_instr__GFX103PLUSEXCLUSIVE = 0x00000036, - TD_PERF_SEL_sample_c_instr__GFX103PLUSEXCLUSIVE = 0x00000037, - TD_PERF_SEL_load_instr__GFX103PLUSEXCLUSIVE = 0x00000038, - TD_PERF_SEL_write_ack_instr__GFX103PLUSEXCLUSIVE = 0x0000003a, - TD_PERF_SEL_d16_en_instr__GFX103PLUSEXCLUSIVE = 0x0000003b, - TD_PERF_SEL_bypassLerp_instr__GFX103PLUSEXCLUSIVE = 0x0000003c, - TD_PERF_SEL_min_max_filter_instr__GFX103PLUSEXCLUSIVE = 0x0000003d, - TD_PERF_SEL_one_comp_return_instr__GFX103PLUSEXCLUSIVE = 0x0000003e, - TD_PERF_SEL_two_comp_return_instr__GFX103PLUSEXCLUSIVE = 0x0000003f, - TD_PERF_SEL_three_comp_return_instr__GFX103PLUSEXCLUSIVE = 0x00000040, - TD_PERF_SEL_four_comp_return_instr__GFX103PLUSEXCLUSIVE = 0x00000041, - TD_PERF_SEL_user_defined_border__GFX103PLUSEXCLUSIVE = 0x00000042, - TD_PERF_SEL_white_border__GFX103PLUSEXCLUSIVE = 0x00000043, - TD_PERF_SEL_opaque_black_border__GFX103PLUSEXCLUSIVE = 0x00000044, - TD_PERF_SEL_lod_warn_from_ta__GFX103PLUSEXCLUSIVE = 0x00000045, - TD_PERF_SEL_instruction_dest_is_lds__GFX103PLUSEXCLUSIVE = 0x00000046, - TD_PERF_SEL_td_cycling_of_nofilter_instr_2cycles__GFX103PLUSEXCLUSIVE = 0x00000047, - TD_PERF_SEL_td_cycling_of_nofilter_instr_4cycles__GFX103PLUSEXCLUSIVE = 0x00000048, - TD_PERF_SEL_tc_cycling_of_nofilter_instr_2cycles__GFX103PLUSEXCLUSIVE = 0x00000049, - TD_PERF_SEL_tc_cycling_of_nofilter_instr_4cycles__GFX103PLUSEXCLUSIVE = 0x0000004a, - TD_PERF_SEL_out_of_order_instr__GFX103PLUSEXCLUSIVE = 0x0000004b, - TD_PERF_SEL_total_num_instr__GFX103PLUSEXCLUSIVE = 0x0000004c, - TD_PERF_SEL_total_num_instr_with_perf_wdw__GFX103PLUSEXCLUSIVE = 0x0000004d, - TD_PERF_SEL_total_num_sampler_instr__GFX103PLUSEXCLUSIVE = 0x0000004e, - TD_PERF_SEL_total_num_sampler_instr_with_perf_wdw__GFX103PLUSEXCLUSIVE = 0x0000004f, - TD_PERF_SEL_total_num_nofilter_instr__GFX103PLUSEXCLUSIVE = 0x00000050, - TD_PERF_SEL_total_num_nofilter_instr_with_perf_wdw__GFX103PLUSEXCLUSIVE = 0x00000051, - TD_PERF_SEL_total_num_ray_tracing_bvh4_instr__GFX103PLUSEXCLUSIVE = 0x00000052, - TD_PERF_SEL_total_num_ray_tracing_bvh4_instr_with_perf_wdw__GFX103PLUSEXCLUSIVE = 0x00000053, - TD_PERF_SEL_status_packet__GFX103PLUSEXCLUSIVE = 0x00000056, - TD_PERF_SEL_done_scoreboard_max_stored_cnt__GFX103PLUSEXCLUSIVE = 0x00000059, - TD_PERF_SEL_done_scoreboard_max_waiting_cnt__GFX103PLUSEXCLUSIVE = 0x0000005a, - TD_PERF_SEL_done_scoreboard_not_empty__GFX103PLUSEXCLUSIVE = 0x0000005b, - TD_PERF_SEL_done_scoreboard_is_full__GFX103PLUSEXCLUSIVE = 0x0000005c, - TD_PERF_SEL_done_scoreboard_bp_due_to_ooo__GFX103PLUSEXCLUSIVE = 0x0000005d, - TD_PERF_SEL_done_scoreboard_bp_due_to_lds__GFX103PLUSEXCLUSIVE = 0x0000005e, - TD_PERF_SEL_nofilter_formatters_turned_on__GFX103PLUSEXCLUSIVE = 0x0000005f, - TD_PERF_SEL_nofilter_insert_extra_comps__GFX103PLUSEXCLUSIVE = 0x00000060, - TD_PERF_SEL_nofilter_popcount_dmask_gt_num_comp_of_fmt__GFX103PLUSEXCLUSIVE = 0x00000061, - TD_PERF_SEL_nofilter_popcount_dmask_lt_num_comp_of_fmt__GFX103PLUSEXCLUSIVE = 0x00000062, - TD_PERF_SEL_msaa_load_instr__GFX103PLUSEXCLUSIVE = 0x00000063, - TD_PERF_SEL_blend_prt_with_prt_default_0__GFX103PLUSEXCLUSIVE = 0x00000064, - TD_PERF_SEL_blend_prt_with_prt_default_1__GFX103PLUSEXCLUSIVE = 0x00000065, - TD_PERF_SEL_resmap_instr__GFX103PLUSEXCLUSIVE = 0x00000066, - TD_PERF_SEL_prt_ack_instr__GFX103PLUSEXCLUSIVE = 0x00000067, - TD_PERF_SEL_resmap_with_volume_filtering__GFX103PLUSEXCLUSIVE = 0x00000068, - TD_PERF_SEL_resmap_with_aniso_filtering__GFX103PLUSEXCLUSIVE = 0x00000069, - TD_PERF_SEL_resmap_with_no_more_filtering__GFX103PLUSEXCLUSIVE = 0x0000006a, - TD_PERF_SEL_resmap_with_cubemap_corner__GFX103PLUSEXCLUSIVE = 0x0000006b, - TD_PERF_SEL_ray_tracing_bvh4_threads_per_instruction_is_0__GFX103PLUSEXCLUSIVE = 0x0000006c, - TD_PERF_SEL_ray_tracing_bvh4_threads_per_instruction_is_1__GFX103PLUSEXCLUSIVE = 0x0000006d, - TD_PERF_SEL_ray_tracing_bvh4_threads_per_instruction_is_2__GFX103PLUSEXCLUSIVE = 0x0000006e, - TD_PERF_SEL_ray_tracing_bvh4_threads_per_instruction_is_3to4__GFX103PLUSEXCLUSIVE = 0x0000006f, - TD_PERF_SEL_ray_tracing_bvh4_threads_per_instruction_is_5to8__GFX103PLUSEXCLUSIVE = 0x00000070, - TD_PERF_SEL_ray_tracing_bvh4_threads_per_instruction_is_9to16__GFX103PLUSEXCLUSIVE = 0x00000071, - TD_PERF_SEL_ray_tracing_bvh4_threads_per_instruction_is_17to31__GFX103PLUSEXCLUSIVE = 0x00000072, - TD_PERF_SEL_ray_tracing_bvh4_threads_per_instruction_is_32__GFX103PLUSEXCLUSIVE = 0x00000073, - TD_PERF_SEL_ray_tracing_bvh4_fp16_box_node__GFX103PLUSEXCLUSIVE = 0x00000074, - TD_PERF_SEL_ray_tracing_bvh4_fp32_box_node__GFX103PLUSEXCLUSIVE = 0x00000075, - TD_PERF_SEL_ray_tracing_bvh4_tri_node__GFX103PLUSEXCLUSIVE = 0x00000076, - TD_PERF_SEL_ray_tracing_bvh4_dropped_box_node__GFX103PLUSEXCLUSIVE = 0x00000077, - TD_PERF_SEL_ray_tracing_bvh4_dropped_tri_node__GFX103PLUSEXCLUSIVE = 0x00000078, - TD_PERF_SEL_ray_tracing_bvh4_invalid_box_node__GFX103PLUSEXCLUSIVE = 0x00000079, - TD_PERF_SEL_ray_tracing_bvh4_invalid_tri_node__GFX103PLUSEXCLUSIVE = 0x0000007a, - TD_PERF_SEL_ray_tracing_bvh4_box_sort_en__GFX103PLUSEXCLUSIVE = 0x0000007b, - TD_PERF_SEL_ray_tracing_bvh4_box_grow_val_nonzero__GFX103PLUSEXCLUSIVE = 0x0000007c, - TD_PERF_SEL_ray_tracing_bvh4_num_box_with_inf_or_nan_vtx__GFX103PLUSEXCLUSIVE = 0x0000007d, - TD_PERF_SEL_ray_tracing_bvh4_num_tri_with_inf_or_nan_vtx__GFX103PLUSEXCLUSIVE = 0x0000007e, - TD_PERF_SEL_ray_tracing_bvh4_num_box_that_squashed_a_nan__GFX103PLUSEXCLUSIVE = 0x0000007f, - TD_PERF_SEL_ray_tracing_bvh4_num_box_misses__GFX103PLUSEXCLUSIVE = 0x00000080, - TD_PERF_SEL_ray_tracing_bvh4_num_tri_misses__GFX103PLUSEXCLUSIVE = 0x00000081, - TD_PERF_SEL_ray_tracing_bvh4_num_tri_tie_breakers__GFX103PLUSEXCLUSIVE = 0x00000082, - TD_PERF_SEL_burst_bin_preempting_nofilter_1__GFX103PLUSEXCLUSIVE = 0x00000083, - TD_PERF_SEL_burst_bin_preempting_nofilter_2to4__GFX103PLUSEXCLUSIVE = 0x00000084, - TD_PERF_SEL_burst_bin_preempting_nofilter_5to7__GFX103PLUSEXCLUSIVE = 0x00000085, - TD_PERF_SEL_burst_bin_preempting_nofilter_8to16__GFX103PLUSEXCLUSIVE = 0x00000086, - TD_PERF_SEL_burst_bin_preempting_nofilter_gt16__GFX103PLUSEXCLUSIVE = 0x00000087, - TD_PERF_SEL_burst_bin_sampler_1__GFX103PLUSEXCLUSIVE = 0x00000088, - TD_PERF_SEL_burst_bin_sampler_2to8__GFX103PLUSEXCLUSIVE = 0x00000089, - TD_PERF_SEL_burst_bin_sampler_9to16__GFX103PLUSEXCLUSIVE = 0x0000008a, - TD_PERF_SEL_burst_bin_sampler_gt16__GFX103PLUSEXCLUSIVE = 0x0000008b, - TD_PERF_SEL_burst_bin_gather_1__GFX103PLUSEXCLUSIVE = 0x0000008c, - TD_PERF_SEL_burst_bin_gather_2to8__GFX103PLUSEXCLUSIVE = 0x0000008d, - TD_PERF_SEL_burst_bin_gather_9to16__GFX103PLUSEXCLUSIVE = 0x0000008e, - TD_PERF_SEL_burst_bin_gather_gt16__GFX103PLUSEXCLUSIVE = 0x0000008f, - TD_PERF_SEL_burst_bin_nofilter_1__GFX103PLUSEXCLUSIVE = 0x00000090, - TD_PERF_SEL_burst_bin_nofilter_2to4__GFX103PLUSEXCLUSIVE = 0x00000091, - TD_PERF_SEL_burst_bin_nofilter_5to7__GFX103PLUSEXCLUSIVE = 0x00000092, - TD_PERF_SEL_burst_bin_nofilter_8to16__GFX103PLUSEXCLUSIVE = 0x00000093, - TD_PERF_SEL_burst_bin_nofilter_gt16__GFX103PLUSEXCLUSIVE = 0x00000094, - TD_PERF_SEL_burst_bin_bvh4_1__GFX103PLUSEXCLUSIVE = 0x00000095, - TD_PERF_SEL_burst_bin_bvh4_2to8__GFX103PLUSEXCLUSIVE = 0x00000096, - TD_PERF_SEL_burst_bin_bvh4_9to16__GFX103PLUSEXCLUSIVE = 0x00000097, - TD_PERF_SEL_burst_bin_bvh4_gt16__GFX103PLUSEXCLUSIVE = 0x00000098, - TD_PERF_SEL_burst_bin_bvh4_box_nodes_1__GFX103PLUSEXCLUSIVE = 0x00000099, - TD_PERF_SEL_burst_bin_bvh4_box_nodes_2to4__GFX103PLUSEXCLUSIVE = 0x0000009a, - TD_PERF_SEL_burst_bin_bvh4_box_nodes_5to7__GFX103PLUSEXCLUSIVE = 0x0000009b, - TD_PERF_SEL_burst_bin_bvh4_box_nodes_8to16__GFX103PLUSEXCLUSIVE = 0x0000009c, - TD_PERF_SEL_burst_bin_bvh4_box_nodes_gt16__GFX103PLUSEXCLUSIVE = 0x0000009d, - TD_PERF_SEL_burst_bin_bvh4_tri_nodes_1__GFX103PLUSEXCLUSIVE = 0x0000009e, - TD_PERF_SEL_burst_bin_bvh4_tri_nodes_2to8__GFX103PLUSEXCLUSIVE = 0x0000009f, - TD_PERF_SEL_burst_bin_bvh4_tri_nodes_9to16__GFX103PLUSEXCLUSIVE = 0x000000a0, - TD_PERF_SEL_burst_bin_bvh4_tri_nodes_gt16__GFX103PLUSEXCLUSIVE = 0x000000a1, - TD_PERF_SEL_burst_bin_bvh4_dropped_nodes_1__GFX103PLUSEXCLUSIVE = 0x000000a2, - TD_PERF_SEL_burst_bin_bvh4_dropped_nodes_2to8__GFX103PLUSEXCLUSIVE = 0x000000a3, - TD_PERF_SEL_burst_bin_bvh4_dropped_nodes_9to16__GFX103PLUSEXCLUSIVE = 0x000000a4, - TD_PERF_SEL_burst_bin_bvh4_dropped_nodes_gt16__GFX103PLUSEXCLUSIVE = 0x000000a5, - TD_PERF_SEL_burst_bin_bvh4_invalid_nodes_1__GFX103PLUSEXCLUSIVE = 0x000000a6, - TD_PERF_SEL_burst_bin_bvh4_invalid_nodes_2to8__GFX103PLUSEXCLUSIVE = 0x000000a7, - TD_PERF_SEL_burst_bin_bvh4_invalid_nodes_9to16__GFX103PLUSEXCLUSIVE = 0x000000a8, - TD_PERF_SEL_burst_bin_bvh4_invalid_nodes_gt16__GFX103PLUSEXCLUSIVE = 0x000000a9, - TD_PERF_SEL_bubble_bin_ta_waiting_for_tc_data_0__GFX103PLUSEXCLUSIVE = 0x000000aa, - TD_PERF_SEL_bubble_bin_ta_waiting_for_tc_data_1__GFX103PLUSEXCLUSIVE = 0x000000ab, - TD_PERF_SEL_bubble_bin_ta_waiting_for_tc_data_2to31__GFX103PLUSEXCLUSIVE = 0x000000ac, - TD_PERF_SEL_bubble_bin_ta_waiting_for_tc_data_32to127__GFX103PLUSEXCLUSIVE = 0x000000ad, - TD_PERF_SEL_bubble_bin_ta_waiting_for_tc_data_128to511__GFX103PLUSEXCLUSIVE = 0x000000ae, - TD_PERF_SEL_bubble_bin_ta_waiting_for_tc_data_gt511__GFX103PLUSEXCLUSIVE = 0x000000af, - TD_PERF_SEL_bubble_bin_lds_stall_1to3__GFX103PLUSEXCLUSIVE = 0x000000b0, - TD_PERF_SEL_bubble_bin_lds_stall_4to7__GFX103PLUSEXCLUSIVE = 0x000000b1, - TD_PERF_SEL_bubble_bin_lds_stall_8to15__GFX103PLUSEXCLUSIVE = 0x000000b2, - TD_PERF_SEL_bubble_bin_lds_stall_gt15__GFX103PLUSEXCLUSIVE = 0x000000b3, - TD_PERF_SEL_preempting_nofilter_max_cnt__GFX103PLUSEXCLUSIVE = 0x000000b4, - TD_PERF_SEL_sampler_lerp0_active__GFX103PLUSEXCLUSIVE = 0x000000b5, - TD_PERF_SEL_sampler_lerp1_active__GFX103PLUSEXCLUSIVE = 0x000000b6, - TD_PERF_SEL_sampler_lerp2_active__GFX103PLUSEXCLUSIVE = 0x000000b7, - TD_PERF_SEL_sampler_lerp3_active__GFX103PLUSEXCLUSIVE = 0x000000b8, - TD_PERF_SEL_nofilter_total_num_comps_to_lds__GFX103PLUSEXCLUSIVE = 0x000000b9, - TD_PERF_SEL_nofilter_dword_cycling_2cycles__GFX103PLUSEXCLUSIVE = 0x000000bd, - TD_PERF_SEL_nofilter_dword_cycling_4cycles__GFX103PLUSEXCLUSIVE = 0x000000be, - TD_PERF_SEL_input_bp_due_to_done_scoreboard_full__GFX103PLUSEXCLUSIVE = 0x000000bf, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - TD_PERF_SEL_sampler_preformatter_sclk_en__GFX104PLUS = 0x00000008, - TD_PERF_SEL_ray_tracing_bvh4_sclk_en__GFX104PLUS = 0x00000016, - TD_PERF_SEL_ray_tracing_bvh4_ip_sclk_en__GFX104PLUS = 0x00000017, -#endif - TD_PERF_SEL_sampler_lerp_busy__GFX10PLUS = 0x00000003, - TD_PERF_SEL_sampler_out_busy__GFX10PLUS = 0x00000004, - TD_PERF_SEL_nofilter_busy__GFX10PLUS = 0x00000005, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - TD_PERF_SEL_sampler_bilerp_sclk_en__GFX11 = 0x00000009, - TD_PERF_SEL_sampler_bypass_sclk_en__GFX11 = 0x0000000a, - TD_PERF_SEL_sampler_minmax_sclk_en__GFX11 = 0x0000000b, - TD_PERF_SEL_sampler_accum_sclk_en__GFX11 = 0x0000000c, - TD_PERF_SEL_sampler_format_flt_sclk_en__GFX11 = 0x0000000d, - TD_PERF_SEL_sampler_format_fxdpt_sclk_en__GFX11 = 0x0000000e, - TD_PERF_SEL_sampler_out_sclk_en__GFX11 = 0x0000000f, - TD_PERF_SEL_nofilter_sclk_en__GFX11 = 0x00000010, - TD_PERF_SEL_nofilter_d32_sclk_en__GFX11 = 0x00000011, - TD_PERF_SEL_nofilter_d16_sclk_en__GFX11 = 0x00000012, - TD_PERF_SEL_ray_tracing_bvh4_instr_invld_thread_cnt__GFX11 = 0x000000c0, - TD_PERF_SEL_ray_tracing_bvh4_box_sort_closest_child__GFX11 = 0x000000c1, - TD_PERF_SEL_ray_tracing_bvh4_box_sort_largest_first__GFX11 = 0x000000c2, - TD_PERF_SEL_ray_tracing_bvh4_box_sort_closest_midpoint__GFX11 = 0x000000c3, - TD_PERF_SEL_store_preempts_a_load__GFX11 = 0x000000c4, -#endif -} TD_PERFCOUNT_SEL; - -constexpr unsigned int MaxTdPerfcountSelGfx09 = TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt__GFX09; -constexpr unsigned int MaxTdPerfcountSelGfx101 = TD_PERF_SEL_nofilter_popcount_dmask_lt_num_comp_of_fmt__GFX101; -constexpr unsigned int MaxTdPerfcountSelGfx103 = TD_PERF_SEL_input_bp_due_to_done_scoreboard_full__GFX103PLUSEXCLUSIVE; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxTdPerfcountSelGfx11 = TD_PERF_SEL_store_preempts_a_load__GFX11; -#endif - -typedef enum TEX_BC_SWIZZLE { - TEX_BC_Swizzle_XYZW = 0x00000000, - TEX_BC_Swizzle_XWYZ = 0x00000001, - TEX_BC_Swizzle_WZYX = 0x00000002, - TEX_BC_Swizzle_WXYZ = 0x00000003, - TEX_BC_Swizzle_ZYXW = 0x00000004, - TEX_BC_Swizzle_YXWZ = 0x00000005, -} TEX_BC_SWIZZLE; - -typedef enum TEX_BORDER_COLOR_TYPE { - TEX_BorderColor_TransparentBlack = 0x00000000, - TEX_BorderColor_OpaqueBlack = 0x00000001, - TEX_BorderColor_OpaqueWhite = 0x00000002, - TEX_BorderColor_Register = 0x00000003, -} TEX_BORDER_COLOR_TYPE; - -typedef enum TEX_CHROMA_KEY { - TEX_ChromaKey_Disabled = 0x00000000, - TEX_ChromaKey_Kill = 0x00000001, - TEX_ChromaKey_Blend = 0x00000002, - TEX_ChromaKey_RESERVED_3 = 0x00000003, -} TEX_CHROMA_KEY; - -typedef enum TEX_CLAMP { - TEX_Clamp_Repeat = 0x00000000, - TEX_Clamp_Mirror = 0x00000001, - TEX_Clamp_ClampToLast = 0x00000002, - TEX_Clamp_MirrorOnceToLast = 0x00000003, - TEX_Clamp_ClampHalfToBorder = 0x00000004, - TEX_Clamp_MirrorOnceHalfToBorder = 0x00000005, - TEX_Clamp_ClampToBorder = 0x00000006, - TEX_Clamp_MirrorOnceToBorder = 0x00000007, -} TEX_CLAMP; - -typedef enum TEX_COORD_TYPE { - TEX_CoordType_Unnormalized = 0x00000000, - TEX_CoordType_Normalized = 0x00000001, -} TEX_COORD_TYPE; - -typedef enum TEX_DEPTH_COMPARE_FUNCTION { - TEX_DepthCompareFunction_Never = 0x00000000, - TEX_DepthCompareFunction_Less = 0x00000001, - TEX_DepthCompareFunction_Equal = 0x00000002, - TEX_DepthCompareFunction_LessEqual = 0x00000003, - TEX_DepthCompareFunction_Greater = 0x00000004, - TEX_DepthCompareFunction_NotEqual = 0x00000005, - TEX_DepthCompareFunction_GreaterEqual = 0x00000006, - TEX_DepthCompareFunction_Always = 0x00000007, -} TEX_DEPTH_COMPARE_FUNCTION; - -typedef enum TEX_DIM { - TEX_Dim_1D = 0x00000000, - TEX_Dim_2D = 0x00000001, - TEX_Dim_3D = 0x00000002, - TEX_Dim_CubeMap = 0x00000003, - TEX_Dim_1DArray = 0x00000004, - TEX_Dim_2DArray = 0x00000005, - TEX_Dim_2D_MSAA = 0x00000006, - TEX_Dim_2DArray_MSAA = 0x00000007, -} TEX_DIM; - -typedef enum TEX_FORMAT_COMP { - TEX_FormatComp_Unsigned = 0x00000000, - TEX_FormatComp_Signed = 0x00000001, - TEX_FormatComp_UnsignedBiased = 0x00000002, - TEX_FormatComp_RESERVED_3 = 0x00000003, -} TEX_FORMAT_COMP; - -typedef enum TEX_MAX_ANISO_RATIO { - TEX_MaxAnisoRatio_1to1 = 0x00000000, - TEX_MaxAnisoRatio_2to1 = 0x00000001, - TEX_MaxAnisoRatio_4to1 = 0x00000002, - TEX_MaxAnisoRatio_8to1 = 0x00000003, - TEX_MaxAnisoRatio_16to1 = 0x00000004, - TEX_MaxAnisoRatio_RESERVED_5 = 0x00000005, - TEX_MaxAnisoRatio_RESERVED_6 = 0x00000006, - TEX_MaxAnisoRatio_RESERVED_7 = 0x00000007, -} TEX_MAX_ANISO_RATIO; - -typedef enum TEX_MIP_FILTER { - TEX_MipFilter_None = 0x00000000, - TEX_MipFilter_Point = 0x00000001, - TEX_MipFilter_Linear = 0x00000002, - TEX_MipFilter_Point_Aniso_Adj = 0x00000003, -} TEX_MIP_FILTER; - -typedef enum TEX_REQUEST_SIZE { - TEX_RequestSize_32B = 0x00000000, - TEX_RequestSize_64B = 0x00000001, - TEX_RequestSize_128B = 0x00000002, - TEX_RequestSize_2X64B = 0x00000003, -} TEX_REQUEST_SIZE; - -typedef enum TEX_SAMPLER_TYPE { - TEX_SamplerType_Invalid = 0x00000000, - TEX_SamplerType_Valid = 0x00000001, -} TEX_SAMPLER_TYPE; - -typedef enum TEX_XY_FILTER { - TEX_XYFilter_Point = 0x00000000, - TEX_XYFilter_Linear = 0x00000001, - TEX_XYFilter_AnisoPoint = 0x00000002, - TEX_XYFilter_AnisoLinear = 0x00000003, -} TEX_XY_FILTER; - -typedef enum TEX_Z_FILTER { - TEX_ZFilter_None = 0x00000000, - TEX_ZFilter_Point = 0x00000001, - TEX_ZFilter_Linear = 0x00000002, - TEX_ZFilter_RESERVED_3 = 0x00000003, -} TEX_Z_FILTER; - -typedef enum UMC_PERFCOUNT_SELECT { - UMC_PERF_SEL_IncomingPh__GFX101 = 0x00000001, - UMC_PERF_SEL_IncomingPm__GFX101 = 0x00000002, - UMC_PERF_SEL_IncomingPc__GFX101 = 0x00000003, - UMC_PERF_SEL_IncomingPgtMiss__GFX101 = 0x00000004, - UMC_PERF_SEL_ActCmd__GFX101 = 0x00000005, - UMC_PERF_SEL_PchgCmd__GFX101 = 0x00000006, - UMC_PERF_SEL_SpecPchgCmd__GFX101 = 0x00000007, - UMC_PERF_SEL_PchgAllCmd__GFX101 = 0x00000008, - UMC_PERF_SEL_OtherPchgCmd__GFX101 = 0x00000009, - UMC_PERF_SEL_CasCmd__GFX101 = 0x0000000a, - UMC_PERF_SEL_CasWithAP__GFX101 = 0x0000000b, - UMC_PERF_SEL_CasWithBurstCh__GFX101 = 0x0000000c, - UMC_PERF_SEL_TurnRndRdWr__GFX101 = 0x0000000d, - UMC_PERF_SEL_Back2BackSame__GFX101 = 0x0000000e, - UMC_PERF_SEL_Back2BackDiffB__GFX101 = 0x0000000f, - UMC_PERF_SEL_Back2BackDiffCidSameCS__GFX101 = 0x00000010, - UMC_PERF_SEL_Back2BackDiffCSSameDimm__GFX101 = 0x00000011, - UMC_PERF_SEL_Back2BackDiffDimm__GFX101 = 0x00000012, - UMC_PERF_SEL_CmdSlotClks__GFX101 = 0x00000013, - UMC_PERF_SEL_DataSlotClks__GFX101 = 0x00000014, - UMC_PERF_SEL_PageTableOverflow__GFX101 = 0x00000015, - UMC_PERF_SEL_CmdError__GFX101 = 0x00000016, - UMC_PERF_SEL_DataError__GFX101 = 0x00000017, - UMC_PERF_SEL_RetryParity__GFX101 = 0x00000018, - UMC_PERF_SEL_RetryCRC__GFX101 = 0x00000019, - UMC_PERF_SEL_BypassSat__GFX101 = 0x0000001a, - UMC_PERF_SEL_PrechagePwrDownClks__GFX101 = 0x0000001b, - UMC_PERF_SEL_ThermalThrottleClks__GFX101 = 0x0000001c, - UMC_PERF_SEL_DcqOccupancy_00__GFX101 = 0x0000001d, - UMC_PERF_SEL_DcqOccupancy_25__GFX101 = 0x0000001e, - UMC_PERF_SEL_DcqOccupancy_50__GFX101 = 0x0000001f, - UMC_PERF_SEL_DcqOccupancy_75__GFX101 = 0x00000020, - UMC_PERF_SEL_DcqOccupancy_90__GFX101 = 0x00000021, - UMC_PERF_SEL_SelfRefreshClks__GFX101 = 0x00000022, - UMC_PERF_SEL_PrechargePwrDownAllClks__GFX101 = 0x00000023, - UMC_PERF_SEL_PendReqCnt__GFX101 = 0x00000024, - UMC_PERF_SEL_AutoRefCnt__GFX101 = 0x00000025, - UMC_PERF_SEL_BeqRdWrCmd__GFX101 = 0x00000026, - UMC_PERF_SEL_BeqEdcErr__GFX101 = 0x00000027, - UMC_PERF_SEL_Switch2Read__GFX101 = 0x0000002a, - UMC_PERF_SEL_Switch2Write__GFX101 = 0x0000002b, - UMC_PERF_SEL_ArbRefReqClks__GFX101 = 0x0000002c, - UMC_PERF_SEL_UrgRefStbkClks__GFX101 = 0x0000002d, - UMC_PERF_SEL_RtCalEvtCnt__GFX101 = 0x00000030, - UMC_PERF_SEL_RtCalBlockClks__GFX101 = 0x00000031, - UMC_PERF_SEL_RefreshType__GFX101 = 0x00000032, -#if CHIP_HDR_NAVI21 - UMC_PERF_SEL_NONE__NV21 = 0x00000000, - UMC_PERF_SEL_SdpPh__NV21 = 0x00000001, - UMC_PERF_SEL_SdpPm__NV21 = 0x00000002, - UMC_PERF_SEL_SdpPc__NV21 = 0x00000003, - UMC_PERF_SEL_ActCmd__NV21 = 0x00000005, - UMC_PERF_SEL_PchgCmd__NV21 = 0x00000006, - UMC_PERF_SEL_SpecPchgCmd__NV21 = 0x00000007, - UMC_PERF_SEL_PchgAllCmd__NV21 = 0x00000008, - UMC_PERF_SEL_OtherPchgCmd__NV21 = 0x00000009, - UMC_PERF_SEL_CasCmd__NV21 = 0x0000000a, - UMC_PERF_SEL_CasWithAP__NV21 = 0x0000000b, - UMC_PERF_SEL_TurnRndRdWr__NV21 = 0x0000000d, - UMC_PERF_SEL_CmdSlotClks__NV21 = 0x00000013, - UMC_PERF_SEL_DataSlotClks__NV21 = 0x00000014, - UMC_PERF_SEL_CmdError__NV21 = 0x00000016, - UMC_PERF_SEL_DataError__NV21 = 0x00000017, - UMC_PERF_SEL_RetryParity__NV21 = 0x00000018, - UMC_PERF_SEL_RetryCRC__NV21 = 0x00000019, - UMC_PERF_SEL_BypassSat__NV21 = 0x0000001a, - UMC_PERF_SEL_PrechagePwrDownClks__NV21 = 0x0000001b, - UMC_PERF_SEL_ThermalThrottleClks__NV21 = 0x0000001c, - UMC_PERF_SEL_DcqOccupancy__NV21 = 0x0000001d, - UMC_PERF_SEL_SelfRefreshClks__NV21 = 0x00000022, - UMC_PERF_SEL_PendReqCnt__NV21 = 0x00000024, - UMC_PERF_SEL_AutoRefCnt__NV21 = 0x00000025, - UMC_PERF_SEL_BeqRdWrCmd__NV21 = 0x00000026, - UMC_PERF_SEL_BeqEdcErr__NV21 = 0x00000027, - UMC_PERF_SEL_ArbRdWrPhase__NV21 = 0x0000002a, - UMC_PERF_SEL_WriteCycle__NV21 = 0x0000002b, - UMC_PERF_SEL_ArbRefReqClks__NV21 = 0x0000002c, - UMC_PERF_SEL_UrgRefStBkClks__NV21 = 0x0000002d, - UMC_PERF_SEL_RtCalEvtCnt__NV21 = 0x00000030, - UMC_PERF_SEL_RtCalBlockClks__NV21 = 0x00000031, - UMC_PERF_SEL_SdpCoherent__NV21 = 0x00000033, - UMC_PERF_SEL_SdpChngPri__NV21 = 0x00000034, - UMC_PERF_SEL_SdpCancel__NV21 = 0x00000035, - UMC_PERF_SEL_SdpBurstLength__NV21 = 0x00000036, - UMC_PERF_SEL_SdpBurstCnt__NV21 = 0x00000037, - UMC_PERF_SEL_SdpGapLen__NV21 = 0x00000038, - UMC_PERF_SEL_SdpGapCnt__NV21 = 0x00000039, - UMC_PERF_SEL_CasCmdWSM__NV21 = 0x0000003a, - UMC_PERF_SEL_CasCmdWDM__NV21 = 0x0000003b, - UMC_PERF_SEL_CasBurstLength__NV21 = 0x0000003c, - UMC_PERF_SEL_CasBurstCnt__NV21 = 0x0000003d, - UMC_PERF_SEL_CasCmdCW__NV21 = 0x0000003e, - UMC_PERF_SEL_TurnRndRdWr_n__NV21 = 0x0000003f, - UMC_PERF_SEL_TurnRndRdWr_RdThresh__NV21 = 0x00000040, - UMC_PERF_SEL_TurnRndWrRd_noWr__NV21 = 0x00000041, - UMC_PERF_SEL_TurnRndWrRd_WrThresh__NV21 = 0x00000042, - UMC_PERF_SEL_TurnRndWrRd_RdUrg__NV21 = 0x00000043, - UMC_PERF_SEL_PrechargePwrDownEntry__NV21 = 0x00000044, - UMC_PERF_SEL_ThermalThrottleEntry__NV21 = 0x00000045, - UMC_PERF_SEL_PgtActiveBanksCnt__NV21 = 0x00000046, - UMC_PERF_SEL_SelfRefreshEntry__NV21 = 0x00000047, - UMC_PERF_SEL_RdRspBusyNoCrdt__NV21 = 0x00000048, - UMC_PERF_SEL_RdstallRdRspBufNotRdy__NV21 = 0x00000049, - UMC_PERF_SEL_RdrspBusyVdciNotRdy__NV21 = 0x0000004a, - UMC_PERF_SEL_DfiLpReq__NV21 = 0x0000004b, - UMC_PERF_SEL_DfiLpCyc__NV21 = 0x0000004c, - UMC_PERF_SEL_BubbleOverall__NV21 = 0x0000004d, - UMC_PERF_SEL_BeqEdcErrB0__NV21 = 0x0000004e, - UMC_PERF_SEL_BeqEdcErrB1__NV21 = 0x0000004f, - UMC_PERF_SEL_TempOverThresh__NV21 = 0x00000052, - UMC_PERF_SEL_TempCnt__NV21 = 0x00000053, -#endif -#if CHIP_HDR_NAVI22 - UMC_PERF_SEL_NONE__NV22 = 0x00000000, - UMC_PERF_SEL_SdpPh__NV22 = 0x00000001, - UMC_PERF_SEL_SdpPm__NV22 = 0x00000002, - UMC_PERF_SEL_SdpPc__NV22 = 0x00000003, - UMC_PERF_SEL_ActCmd__NV22 = 0x00000005, - UMC_PERF_SEL_PchgCmd__NV22 = 0x00000006, - UMC_PERF_SEL_SpecPchgCmd__NV22 = 0x00000007, - UMC_PERF_SEL_PchgAllCmd__NV22 = 0x00000008, - UMC_PERF_SEL_OtherPchgCmd__NV22 = 0x00000009, - UMC_PERF_SEL_CasCmd__NV22 = 0x0000000a, - UMC_PERF_SEL_CasWithAP__NV22 = 0x0000000b, - UMC_PERF_SEL_TurnRndRdWr__NV22 = 0x0000000d, - UMC_PERF_SEL_CmdSlotClks__NV22 = 0x00000013, - UMC_PERF_SEL_DataSlotClks__NV22 = 0x00000014, - UMC_PERF_SEL_CmdError__NV22 = 0x00000016, - UMC_PERF_SEL_DataError__NV22 = 0x00000017, - UMC_PERF_SEL_RetryParity__NV22 = 0x00000018, - UMC_PERF_SEL_RetryCRC__NV22 = 0x00000019, - UMC_PERF_SEL_BypassSat__NV22 = 0x0000001a, - UMC_PERF_SEL_PrechagePwrDownClks__NV22 = 0x0000001b, - UMC_PERF_SEL_ThermalThrottleClks__NV22 = 0x0000001c, - UMC_PERF_SEL_DcqOccupancy__NV22 = 0x0000001d, - UMC_PERF_SEL_SelfRefreshClks__NV22 = 0x00000022, - UMC_PERF_SEL_PendReqCnt__NV22 = 0x00000024, - UMC_PERF_SEL_AutoRefCnt__NV22 = 0x00000025, - UMC_PERF_SEL_BeqRdWrCmd__NV22 = 0x00000026, - UMC_PERF_SEL_BeqEdcErr__NV22 = 0x00000027, - UMC_PERF_SEL_ArbRdWrPhase__NV22 = 0x0000002a, - UMC_PERF_SEL_WriteCycle__NV22 = 0x0000002b, - UMC_PERF_SEL_ArbRefReqClks__NV22 = 0x0000002c, - UMC_PERF_SEL_UrgRefStBkClks__NV22 = 0x0000002d, - UMC_PERF_SEL_RtCalEvtCnt__NV22 = 0x00000030, - UMC_PERF_SEL_RtCalBlockClks__NV22 = 0x00000031, - UMC_PERF_SEL_SdpCoherent__NV22 = 0x00000033, - UMC_PERF_SEL_SdpChngPri__NV22 = 0x00000034, - UMC_PERF_SEL_SdpCancel__NV22 = 0x00000035, - UMC_PERF_SEL_SdpBurstLength__NV22 = 0x00000036, - UMC_PERF_SEL_SdpBurstCnt__NV22 = 0x00000037, - UMC_PERF_SEL_SdpGapLen__NV22 = 0x00000038, - UMC_PERF_SEL_SdpGapCnt__NV22 = 0x00000039, - UMC_PERF_SEL_CasCmdWSM__NV22 = 0x0000003a, - UMC_PERF_SEL_CasCmdWDM__NV22 = 0x0000003b, - UMC_PERF_SEL_CasBurstLength__NV22 = 0x0000003c, - UMC_PERF_SEL_CasBurstCnt__NV22 = 0x0000003d, - UMC_PERF_SEL_CasCmdCW__NV22 = 0x0000003e, - UMC_PERF_SEL_TurnRndRdWr_n__NV22 = 0x0000003f, - UMC_PERF_SEL_TurnRndRdWr_RdThresh__NV22 = 0x00000040, - UMC_PERF_SEL_TurnRndWrRd_noWr__NV22 = 0x00000041, - UMC_PERF_SEL_TurnRndWrRd_WrThresh__NV22 = 0x00000042, - UMC_PERF_SEL_TurnRndWrRd_RdUrg__NV22 = 0x00000043, - UMC_PERF_SEL_PrechargePwrDownEntry__NV22 = 0x00000044, - UMC_PERF_SEL_ThermalThrottleEntry__NV22 = 0x00000045, - UMC_PERF_SEL_PgtActiveBanksCnt__NV22 = 0x00000046, - UMC_PERF_SEL_SelfRefreshEntry__NV22 = 0x00000047, - UMC_PERF_SEL_RdRspBusyNoCrdt__NV22 = 0x00000048, - UMC_PERF_SEL_RdstallRdRspBufNotRdy__NV22 = 0x00000049, - UMC_PERF_SEL_RdrspBusyVdciNotRdy__NV22 = 0x0000004a, - UMC_PERF_SEL_DfiLpReq__NV22 = 0x0000004b, - UMC_PERF_SEL_DfiLpCyc__NV22 = 0x0000004c, - UMC_PERF_SEL_BubbleOverall__NV22 = 0x0000004d, - UMC_PERF_SEL_BeqEdcErrB0__NV22 = 0x0000004e, - UMC_PERF_SEL_BeqEdcErrB1__NV22 = 0x0000004f, - UMC_PERF_SEL_TempOverThresh__NV22 = 0x00000052, - UMC_PERF_SEL_TempCnt__NV22 = 0x00000053, -#endif -#if CHIP_HDR_NAVI23 - UMC_PERF_SEL_NONE__NV23 = 0x00000000, - UMC_PERF_SEL_SdpPh__NV23 = 0x00000001, - UMC_PERF_SEL_SdpPm__NV23 = 0x00000002, - UMC_PERF_SEL_SdpPc__NV23 = 0x00000003, - UMC_PERF_SEL_ActCmd__NV23 = 0x00000005, - UMC_PERF_SEL_PchgCmd__NV23 = 0x00000006, - UMC_PERF_SEL_SpecPchgCmd__NV23 = 0x00000007, - UMC_PERF_SEL_PchgAllCmd__NV23 = 0x00000008, - UMC_PERF_SEL_OtherPchgCmd__NV23 = 0x00000009, - UMC_PERF_SEL_CasCmd__NV23 = 0x0000000a, - UMC_PERF_SEL_CasWithAP__NV23 = 0x0000000b, - UMC_PERF_SEL_TurnRndRdWr__NV23 = 0x0000000d, - UMC_PERF_SEL_CmdSlotClks__NV23 = 0x00000013, - UMC_PERF_SEL_DataSlotClks__NV23 = 0x00000014, - UMC_PERF_SEL_CmdError__NV23 = 0x00000016, - UMC_PERF_SEL_DataError__NV23 = 0x00000017, - UMC_PERF_SEL_RetryParity__NV23 = 0x00000018, - UMC_PERF_SEL_RetryCRC__NV23 = 0x00000019, - UMC_PERF_SEL_BypassSat__NV23 = 0x0000001a, - UMC_PERF_SEL_PrechagePwrDownClks__NV23 = 0x0000001b, - UMC_PERF_SEL_ThermalThrottleClks__NV23 = 0x0000001c, - UMC_PERF_SEL_DcqOccupancy__NV23 = 0x0000001d, - UMC_PERF_SEL_SelfRefreshClks__NV23 = 0x00000022, - UMC_PERF_SEL_PendReqCnt__NV23 = 0x00000024, - UMC_PERF_SEL_AutoRefCnt__NV23 = 0x00000025, - UMC_PERF_SEL_BeqRdWrCmd__NV23 = 0x00000026, - UMC_PERF_SEL_BeqEdcErr__NV23 = 0x00000027, - UMC_PERF_SEL_ArbRdWrPhase__NV23 = 0x0000002a, - UMC_PERF_SEL_WriteCycle__NV23 = 0x0000002b, - UMC_PERF_SEL_ArbRefReqClks__NV23 = 0x0000002c, - UMC_PERF_SEL_UrgRefStBkClks__NV23 = 0x0000002d, - UMC_PERF_SEL_RtCalEvtCnt__NV23 = 0x00000030, - UMC_PERF_SEL_RtCalBlockClks__NV23 = 0x00000031, - UMC_PERF_SEL_SdpCoherent__NV23 = 0x00000033, - UMC_PERF_SEL_SdpChngPri__NV23 = 0x00000034, - UMC_PERF_SEL_SdpCancel__NV23 = 0x00000035, - UMC_PERF_SEL_SdpBurstLength__NV23 = 0x00000036, - UMC_PERF_SEL_SdpBurstCnt__NV23 = 0x00000037, - UMC_PERF_SEL_SdpGapLen__NV23 = 0x00000038, - UMC_PERF_SEL_SdpGapCnt__NV23 = 0x00000039, - UMC_PERF_SEL_CasCmdWSM__NV23 = 0x0000003a, - UMC_PERF_SEL_CasCmdWDM__NV23 = 0x0000003b, - UMC_PERF_SEL_CasBurstLength__NV23 = 0x0000003c, - UMC_PERF_SEL_CasBurstCnt__NV23 = 0x0000003d, - UMC_PERF_SEL_CasCmdCW__NV23 = 0x0000003e, - UMC_PERF_SEL_TurnRndRdWr_n__NV23 = 0x0000003f, - UMC_PERF_SEL_TurnRndRdWr_RdThresh__NV23 = 0x00000040, - UMC_PERF_SEL_TurnRndWrRd_noWr__NV23 = 0x00000041, - UMC_PERF_SEL_TurnRndWrRd_WrThresh__NV23 = 0x00000042, - UMC_PERF_SEL_TurnRndWrRd_RdUrg__NV23 = 0x00000043, - UMC_PERF_SEL_PrechargePwrDownEntry__NV23 = 0x00000044, - UMC_PERF_SEL_ThermalThrottleEntry__NV23 = 0x00000045, - UMC_PERF_SEL_PgtActiveBanksCnt__NV23 = 0x00000046, - UMC_PERF_SEL_SelfRefreshEntry__NV23 = 0x00000047, - UMC_PERF_SEL_RdRspBusyNoCrdt__NV23 = 0x00000048, - UMC_PERF_SEL_RdstallRdRspBufNotRdy__NV23 = 0x00000049, - UMC_PERF_SEL_RdrspBusyVdciNotRdy__NV23 = 0x0000004a, - UMC_PERF_SEL_DfiLpReq__NV23 = 0x0000004b, - UMC_PERF_SEL_DfiLpCyc__NV23 = 0x0000004c, - UMC_PERF_SEL_BubbleOverall__NV23 = 0x0000004d, - UMC_PERF_SEL_BeqEdcErrB0__NV23 = 0x0000004e, - UMC_PERF_SEL_BeqEdcErrB1__NV23 = 0x0000004f, - UMC_PERF_SEL_TempOverThresh__NV23 = 0x00000052, - UMC_PERF_SEL_TempCnt__NV23 = 0x00000053, -#endif -#if CHIP_HDR_NAVI24 - UMC_PERF_SEL_NONE__NV24 = 0x00000000, - UMC_PERF_SEL_SdpPh__NV24 = 0x00000001, - UMC_PERF_SEL_SdpPm__NV24 = 0x00000002, - UMC_PERF_SEL_SdpPc__NV24 = 0x00000003, - UMC_PERF_SEL_ActCmd__NV24 = 0x00000005, - UMC_PERF_SEL_PchgCmd__NV24 = 0x00000006, - UMC_PERF_SEL_SpecPchgCmd__NV24 = 0x00000007, - UMC_PERF_SEL_PchgAllCmd__NV24 = 0x00000008, - UMC_PERF_SEL_OtherPchgCmd__NV24 = 0x00000009, - UMC_PERF_SEL_CasCmd__NV24 = 0x0000000a, - UMC_PERF_SEL_CasWithAP__NV24 = 0x0000000b, - UMC_PERF_SEL_TurnRndRdWr__NV24 = 0x0000000d, - UMC_PERF_SEL_CmdSlotClks__NV24 = 0x00000013, - UMC_PERF_SEL_DataSlotClks__NV24 = 0x00000014, - UMC_PERF_SEL_CmdError__NV24 = 0x00000016, - UMC_PERF_SEL_DataError__NV24 = 0x00000017, - UMC_PERF_SEL_RetryParity__NV24 = 0x00000018, - UMC_PERF_SEL_RetryCRC__NV24 = 0x00000019, - UMC_PERF_SEL_BypassSat__NV24 = 0x0000001a, - UMC_PERF_SEL_PrechagePwrDownClks__NV24 = 0x0000001b, - UMC_PERF_SEL_ThermalThrottleClks__NV24 = 0x0000001c, - UMC_PERF_SEL_DcqOccupancy__NV24 = 0x0000001d, - UMC_PERF_SEL_SelfRefreshClks__NV24 = 0x00000022, - UMC_PERF_SEL_PendReqCnt__NV24 = 0x00000024, - UMC_PERF_SEL_AutoRefCnt__NV24 = 0x00000025, - UMC_PERF_SEL_BeqRdWrCmd__NV24 = 0x00000026, - UMC_PERF_SEL_BeqEdcErr__NV24 = 0x00000027, - UMC_PERF_SEL_ArbRdWrPhase__NV24 = 0x0000002a, - UMC_PERF_SEL_WriteCycle__NV24 = 0x0000002b, - UMC_PERF_SEL_ArbRefReqClks__NV24 = 0x0000002c, - UMC_PERF_SEL_UrgRefStBkClks__NV24 = 0x0000002d, - UMC_PERF_SEL_RtCalEvtCnt__NV24 = 0x00000030, - UMC_PERF_SEL_RtCalBlockClks__NV24 = 0x00000031, - UMC_PERF_SEL_SdpCoherent__NV24 = 0x00000033, - UMC_PERF_SEL_SdpChngPri__NV24 = 0x00000034, - UMC_PERF_SEL_SdpCancel__NV24 = 0x00000035, - UMC_PERF_SEL_SdpBurstLength__NV24 = 0x00000036, - UMC_PERF_SEL_SdpBurstCnt__NV24 = 0x00000037, - UMC_PERF_SEL_SdpGapLen__NV24 = 0x00000038, - UMC_PERF_SEL_SdpGapCnt__NV24 = 0x00000039, - UMC_PERF_SEL_CasCmdWSM__NV24 = 0x0000003a, - UMC_PERF_SEL_CasCmdWDM__NV24 = 0x0000003b, - UMC_PERF_SEL_CasBurstLength__NV24 = 0x0000003c, - UMC_PERF_SEL_CasBurstCnt__NV24 = 0x0000003d, - UMC_PERF_SEL_CasCmdCW__NV24 = 0x0000003e, - UMC_PERF_SEL_TurnRndRdWr_n__NV24 = 0x0000003f, - UMC_PERF_SEL_TurnRndRdWr_RdThresh__NV24 = 0x00000040, - UMC_PERF_SEL_TurnRndWrRd_noWr__NV24 = 0x00000041, - UMC_PERF_SEL_TurnRndWrRd_WrThresh__NV24 = 0x00000042, - UMC_PERF_SEL_TurnRndWrRd_RdUrg__NV24 = 0x00000043, - UMC_PERF_SEL_PrechargePwrDownEntry__NV24 = 0x00000044, - UMC_PERF_SEL_ThermalThrottleEntry__NV24 = 0x00000045, - UMC_PERF_SEL_PgtActiveBanksCnt__NV24 = 0x00000046, - UMC_PERF_SEL_SelfRefreshEntry__NV24 = 0x00000047, - UMC_PERF_SEL_RdRspBusyNoCrdt__NV24 = 0x00000048, - UMC_PERF_SEL_RdstallRdRspBufNotRdy__NV24 = 0x00000049, - UMC_PERF_SEL_RdrspBusyVdciNotRdy__NV24 = 0x0000004a, - UMC_PERF_SEL_DfiLpReq__NV24 = 0x0000004b, - UMC_PERF_SEL_DfiLpCyc__NV24 = 0x0000004c, - UMC_PERF_SEL_BubbleOverall__NV24 = 0x0000004d, - UMC_PERF_SEL_BeqEdcErrB0__NV24 = 0x0000004e, - UMC_PERF_SEL_BeqEdcErrB1__NV24 = 0x0000004f, - UMC_PERF_SEL_TempOverThresh__NV24 = 0x00000052, - UMC_PERF_SEL_TempCnt__NV24 = 0x00000053, -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - UMC_PERF_SEL_NONE__NV3X = 0x00000000, - UMC_PERF_SEL_SdpPh__NV3X = 0x00000001, - UMC_PERF_SEL_SdpPm__NV3X = 0x00000002, - UMC_PERF_SEL_SdpPc__NV3X = 0x00000003, - UMC_PERF_SEL_ActCmd__NV3X = 0x00000005, - UMC_PERF_SEL_PchgCmd__NV3X = 0x00000006, - UMC_PERF_SEL_SpecPchgCmd__NV3X = 0x00000007, - UMC_PERF_SEL_PchgAllCmd__NV3X = 0x00000008, - UMC_PERF_SEL_OtherPchgCmd__NV3X = 0x00000009, - UMC_PERF_SEL_CasCmd__NV3X = 0x0000000a, - UMC_PERF_SEL_CasWithAP__NV3X = 0x0000000b, - UMC_PERF_SEL_TurnRndRdWr__NV3X = 0x0000000d, - UMC_PERF_SEL_CmdSlotClks__NV3X = 0x00000013, - UMC_PERF_SEL_DataSlotClks__NV3X = 0x00000014, - UMC_PERF_SEL_CmdError__NV3X = 0x00000016, - UMC_PERF_SEL_DataError__NV3X = 0x00000017, - UMC_PERF_SEL_RetryParity__NV3X = 0x00000018, - UMC_PERF_SEL_RetryCRC__NV3X = 0x00000019, - UMC_PERF_SEL_BypassSat__NV3X = 0x0000001a, - UMC_PERF_SEL_PrechagePwrDownClks__NV3X = 0x0000001b, - UMC_PERF_SEL_ThermalThrottleClks__NV3X = 0x0000001c, - UMC_PERF_SEL_DcqOccupancy__NV3X = 0x0000001d, - UMC_PERF_SEL_SelfRefreshClks__NV3X = 0x00000022, - UMC_PERF_SEL_PendReqCnt__NV3X = 0x00000024, - UMC_PERF_SEL_AutoRefCnt__NV3X = 0x00000025, - UMC_PERF_SEL_BeqRdWrCmd__NV3X = 0x00000026, - UMC_PERF_SEL_BeqEdcErr__NV3X = 0x00000027, - UMC_PERF_SEL_ArbRdWrPhase__NV3X = 0x0000002a, - UMC_PERF_SEL_WriteCycle__NV3X = 0x0000002b, - UMC_PERF_SEL_ArbRefReqClks__NV3X = 0x0000002c, - UMC_PERF_SEL_UrgRefStBkClks__NV3X = 0x0000002d, - UMC_PERF_SEL_RtCalEvtCnt__NV3X = 0x00000030, - UMC_PERF_SEL_RtCalBlockClks__NV3X = 0x00000031, - UMC_PERF_SEL_SdpCoherent__NV3X = 0x00000033, - UMC_PERF_SEL_SdpChngPri__NV3X = 0x00000034, - UMC_PERF_SEL_SdpCancel__NV3X = 0x00000035, - UMC_PERF_SEL_SdpBurstLength__NV3X = 0x00000036, - UMC_PERF_SEL_SdpBurstCnt__NV3X = 0x00000037, - UMC_PERF_SEL_SdpGapLen__NV3X = 0x00000038, - UMC_PERF_SEL_SdpGapCnt__NV3X = 0x00000039, - UMC_PERF_SEL_CasCmdWSM__NV3X = 0x0000003a, - UMC_PERF_SEL_CasCmdWDM__NV3X = 0x0000003b, - UMC_PERF_SEL_CasBurstLength__NV3X = 0x0000003c, - UMC_PERF_SEL_CasBurstCnt__NV3X = 0x0000003d, - UMC_PERF_SEL_CasCmdCW__NV3X = 0x0000003e, - UMC_PERF_SEL_TurnRndRdWr_n__NV3X = 0x0000003f, - UMC_PERF_SEL_TurnRndRdWr_RdThresh__NV3X = 0x00000040, - UMC_PERF_SEL_TurnRndWrRd_noWr__NV3X = 0x00000041, - UMC_PERF_SEL_TurnRndWrRd_WrThresh__NV3X = 0x00000042, - UMC_PERF_SEL_TurnRndWrRd_RdUrg__NV3X = 0x00000043, - UMC_PERF_SEL_PrechargePwrDownEntry__NV3X = 0x00000044, - UMC_PERF_SEL_ThermalThrottleEntry__NV3X = 0x00000045, - UMC_PERF_SEL_PgtActiveBanksCnt__NV3X = 0x00000046, - UMC_PERF_SEL_SelfRefreshEntry__NV3X = 0x00000047, - UMC_PERF_SEL_RdRspBusyNoCrdt__NV3X = 0x00000048, - UMC_PERF_SEL_RdstallRdRspBufNotRdy__NV3X = 0x00000049, - UMC_PERF_SEL_RdrspBusyVdciNotRdy__NV3X = 0x0000004a, - UMC_PERF_SEL_DfiLpReq__NV3X = 0x0000004b, - UMC_PERF_SEL_DfiLpCyc__NV3X = 0x0000004c, - UMC_PERF_SEL_BubbleOverall__NV3X = 0x0000004d, - UMC_PERF_SEL_BeqEdcErrB0__NV3X = 0x0000004e, - UMC_PERF_SEL_BeqEdcErrB1__NV3X = 0x0000004f, - UMC_PERF_SEL_BeqEdcErrB2__NV3X = 0x00000050, - UMC_PERF_SEL_BeqEdcErrB3__NV3X = 0x00000051, - UMC_PERF_SEL_TempOverThresh__NV3X = 0x00000052, - UMC_PERF_SEL_TempCnt__NV3X = 0x00000053, - UMC_PERF_SEL_ReqFullEntry__NV3X = 0x00000054, - UMC_PERF_SEL_NumReqs__NV3X = 0x00000055, - UMC_PERF_SEL_CasWithDeAlloc__NV3X = 0x00000056, - UMC_PERF_SEL_SdpNewEntry__NV3X = 0x00000057, - UMC_PERF_SEL_SdpHitEntry__NV3X = 0x00000058, - UMC_PERF_SEL_RfmCmd__NV3X = 0x00000059, - UMC_PERF_SEL_RfmBlockClk__NV3X = 0x0000005a, - UMC_PERF_SEL_PatrolScrub__NV3X = 0x0000005b, - UMC_PERF_SEL_RedirectScrub__NV3X = 0x0000005c, - UMC_PERF_SEL_PoisonScrub__NV3X = 0x0000005d, - UMC_PERF_SEL_MemClear__NV3X = 0x0000005e, - UMC_PERF_SEL_DroppedRedirectScrub__NV3X = 0x0000005f, - UMC_PERF_SEL_DroppedPoisonScrub__NV3X = 0x00000060, - UMC_PERF_SEL_DroppedScrubWrite__NV3X = 0x00000061, - UMC_PERF_SEL_NoRefOverThres__NV3X = 0x00000062, - UMC_PERF_SEL_StoredRefOverThres__NV3X = 0x00000063, - UMC_PERF_SEL_PptRunning__NV3X = 0x00000064, - UMC_PERF_SEL_PptTrig__NV3X = 0x00000065, - UMC_PERF_SEL_PptMinWin__NV3X = 0x00000066, - UMC_PERF_SEL_CrcErr__NV3X = 0x00000067, - UMC_PERF_SEL_EccErr__NV3X = 0x00000068, - UMC_PERF_SEL_DroopCmdThrot__NV3X = 0x00000069, - UMC_PERF_SEL_DroopDetAtWin__NV3X = 0x0000006a, - UMC_PERF_SEL_DroopDetect__NV3X = 0x0000006b, - UMC_PERF_SEL_ClockCount__NV3X = 0x000000ff, -#endif - UMC_PERF_SEL_IncomingPh__VG10 = 0x00000001, - UMC_PERF_SEL_IncomingPm__VG10 = 0x00000002, - UMC_PERF_SEL_IncomingPc__VG10 = 0x00000003, - UMC_PERF_SEL_IncomingPgtMiss__VG10 = 0x00000004, - UMC_PERF_SEL_ActCmd__VG10 = 0x00000005, - UMC_PERF_SEL_PchgCmd__VG10 = 0x00000006, - UMC_PERF_SEL_SpecPchgCmd__VG10 = 0x00000007, - UMC_PERF_SEL_PchgAllCmd__VG10 = 0x00000008, - UMC_PERF_SEL_OtherPchgCmd__VG10 = 0x00000009, - UMC_PERF_SEL_CasCmd__VG10 = 0x0000000a, - UMC_PERF_SEL_CasWithAP__VG10 = 0x0000000b, - UMC_PERF_SEL_CasWithBurstChop__VG10 = 0x0000000c, - UMC_PERF_SEL_TurnRndRdWr__VG10 = 0x0000000d, - UMC_PERF_SEL_Back2BackSameBankGroup__VG10 = 0x0000000e, - UMC_PERF_SEL_Back2BackDiffBankGroupSameCs__VG10 = 0x0000000f, - UMC_PERF_SEL_Back2BackDiffCidSameCS__VG10 = 0x00000010, - UMC_PERF_SEL_Back2BackDiffCSSameDimm__VG10 = 0x00000011, - UMC_PERF_SEL_Back2BackDiffDimm__VG10 = 0x00000012, - UMC_PERF_SEL_CmdSlotClks__VG10 = 0x00000013, - UMC_PERF_SEL_DataSlotClks__VG10 = 0x00000014, - UMC_PERF_SEL_PageTableOverflow__VG10 = 0x00000015, - UMC_PERF_SEL_CmdError__VG10 = 0x00000016, - UMC_PERF_SEL_DataError__VG10 = 0x00000017, - UMC_PERF_SEL_RetryParity__VG10 = 0x00000018, - UMC_PERF_SEL_RetryCRC__VG10 = 0x00000019, - UMC_PERF_SEL_BypassSat__VG10 = 0x0000001a, - UMC_PERF_SEL_PrechagePwrDownClks__VG10 = 0x0000001b, - UMC_PERF_SEL_ThermalThrottleClks__VG10 = 0x0000001c, - UMC_PERF_SEL_DcqOccupancy_00__VG10 = 0x0000001d, - UMC_PERF_SEL_DcqOccupancy_25__VG10 = 0x0000001e, - UMC_PERF_SEL_DcqOccupancy_50__VG10 = 0x0000001f, - UMC_PERF_SEL_DcqOccupancy_75__VG10 = 0x00000020, - UMC_PERF_SEL_DcqOccupancy_90__VG10 = 0x00000021, - UMC_PERF_SEL_SelfRefreshClks__VG10 = 0x00000022, - UMC_PERF_SEL_PrechargePwrDownAllClks__VG10 = 0x00000023, - UMC_PERF_SEL_PendReqCnt__VG10 = 0x00000024, - UMC_PERF_SEL_AutoRefCnt__VG10 = 0x00000025, - UMC_PERF_SEL_BeqRdWrCmd__VG10 = 0x00000026, - UMC_PERF_SEL_BeqEdcErr__VG10 = 0x00000027, -} UMC_PERFCOUNT_SELECT; - -constexpr unsigned int MaxUmcPerfcountSelectVg10 = UMC_PERF_SEL_BeqEdcErr__VG10; -constexpr unsigned int MaxUmcPerfcountSelectGfx101 = UMC_PERF_SEL_RefreshType__GFX101; -#if CHIP_HDR_NAVI24 -constexpr unsigned int MaxUmcPerfcountSelectNv24 = UMC_PERF_SEL_TempCnt__NV24; -#endif -#if CHIP_HDR_NAVI23 -constexpr unsigned int MaxUmcPerfcountSelectNv23 = UMC_PERF_SEL_TempCnt__NV23; -#endif -#if CHIP_HDR_NAVI22 -constexpr unsigned int MaxUmcPerfcountSelectNv22 = UMC_PERF_SEL_TempCnt__NV22; -#endif -#if CHIP_HDR_NAVI21 -constexpr unsigned int MaxUmcPerfcountSelectNv21 = UMC_PERF_SEL_TempCnt__NV21; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -constexpr unsigned int MaxUmcPerfcountSelectNv3x = UMC_PERF_SEL_ClockCount__NV3X; -#endif - -typedef enum UTCL1PerfSel { - UTCL1_PERF_SEL_NONE = 0x00000000, - UTCL1_PERF_SEL_REQS = 0x00000001, - UTCL1_PERF_SEL_HITS = 0x00000002, - UTCL1_PERF_SEL_MISSES = 0x00000003, - UTCL1_PERF_SEL_BYPASS_REQS__GFX10 = 0x00000004, - UTCL1_PERF_SEL_HIT_INV_FILTER_REQS__GFX10 = 0x00000005, - UTCL1_PERF_SEL_NUM_SMALLK_PAGES__GFX101 = 0x00000006, - UTCL1_PERF_SEL_NUM_BIGK_PAGES__GFX101 = 0x00000007, - UTCL1_PERF_SEL_TOTAL_UTCL2_REQS__GFX101 = 0x00000008, - UTCL1_PERF_SEL_OUTSTANDING_UTCL2_REQS_ACCUM__GFX101 = 0x00000009, - UTCL1_PERF_SEL_STALL_ON_UTCL2_CREDITS__GFX101 = 0x0000000a, - UTCL1_PERF_SEL_STALL_MH_OFIFO_FULL__GFX101 = 0x0000000b, - UTCL1_PERF_SEL_STALL_MH_CAM_FULL__GFX101 = 0x0000000c, - UTCL1_PERF_SEL_NONRANGE_INV_REQS__GFX101 = 0x0000000d, - UTCL1_PERF_SEL_RANGE_INV_REQS__GFX101 = 0x0000000e, - UTCL1_PERF_SEL_SMALLK_PAGES__GFX103DERIVATIVE = 0x00000006, - UTCL1_PERF_SEL_BIGK_PAGES__GFX103DERIVATIVE = 0x00000007, - UTCL1_PERF_SEL_MH_RECENT_BUF_HIT__GFX103DERIVATIVE = 0x00000008, - UTCL1_PERF_SEL_MH_DUPLICATE_DETECT__GFX103DERIVATIVE = 0x00000009, - UTCL1_PERF_SEL_UTCL2_REQS__GFX103DERIVATIVE = 0x0000000a, - UTCL1_PERF_SEL_UTCL2_RET_PERM_FAULT__GFX103DERIVATIVE = 0x0000000b, - UTCL1_PERF_SEL_UTCL2_RET_PRT_FAULT__GFX103DERIVATIVE = 0x0000000c, - UTCL1_PERF_SEL_UTCL2_RET_XNACK_RETRY__GFX103DERIVATIVE = 0x0000000d, - UTCL1_PERF_SEL_UTCL2_RET_FAULT__GFX103DERIVATIVE = 0x0000000e, - UTCL1_PERF_SEL_STALL_UTCL2_CREDITS__GFX103DERIVATIVE = 0x0000000f, - UTCL1_PERF_SEL_STALL_MH_FULL__GFX103DERIVATIVE = 0x00000010, - UTCL1_PERF_SEL_CP_INVREQS__GFX103DERIVATIVE = 0x00000011, - UTCL1_PERF_SEL_UTCL2_UTCL1_INVREQS__GFX103DERIVATIVE = 0x00000012, - UTCL1_PERF_SEL_RANGE_INVREQS__GFX103DERIVATIVE = 0x00000013, - UTCL1_PERF_SEL_INV_ALL_VMID_INVREQS__GFX103DERIVATIVE = 0x00000014, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - UTCL1_PERF_SEL_MH_RECENT_BUF_HIT__GFX11 = 0x00000004, - UTCL1_PERF_SEL_MH_DUPLICATE_DETECT__GFX11 = 0x00000005, - UTCL1_PERF_SEL_UTCL2_REQS__GFX11 = 0x00000006, - UTCL1_PERF_SEL_UTCL2_RET_XNACK_RETRY__GFX11 = 0x00000007, - UTCL1_PERF_SEL_UTCL2_RET_FAULT__GFX11 = 0x00000008, - UTCL1_PERF_SEL_STALL_UTCL2_CREDITS__GFX11 = 0x00000009, - UTCL1_PERF_SEL_STALL_MH_FULL__GFX11 = 0x0000000a, - UTCL1_PERF_SEL_UTCL2_REQS_OUTSTANDING_ACCUM__GFX11 = 0x0000000b, - UTCL1_PERF_SEL_UTCL2_RET_CNT__GFX11 = 0x0000000c, - UTCL1_PERF_SEL_RTNS__GFX11 = 0x0000000d, - UTCL1_PERF_SEL_XLAT_REQ_BUSY__GFX11 = 0x0000000e, - UTCL1_PERF_SEL_RANGE_INVREQS__GFX11 = 0x0000000f, - UTCL1_PERF_SEL_INV_ALL_VMID_INVREQS__GFX11 = 0x00000010, - UTCL1_PERF_SEL_BYPASS_REQS__GFX11 = 0x00000011, - UTCL1_PERF_SEL_HIT_INV_FILTER_REQS__GFX11 = 0x00000012, - UTCL1_PERF_SEL_UTCL2_RET_PERM_FAULT__GFX11 = 0x00000013, - UTCL1_PERF_SEL_UTCL2_RET_PRT_FAULT__GFX11 = 0x00000014, - UTCL1_PERF_SEL_CP_INVREQS__GFX11 = 0x00000015, - UTCL1_PERF_SEL_UTCL2_UTCL1_INVREQS__GFX11 = 0x00000016, - UTCL1_PERF_SEL_NUM_UTCL2_RTN_SIZE_4K_64K__GFX11 = 0x00000017, - UTCL1_PERF_SEL_NUM_UTCL2_RTN_SIZE_64K_256K__GFX11 = 0x00000018, - UTCL1_PERF_SEL_NUM_UTCL2_RTN_SIZE_256K_512K__GFX11 = 0x00000019, - UTCL1_PERF_SEL_NUM_UTCL2_RTN_SIZE_512K_1M__GFX11 = 0x0000001a, - UTCL1_PERF_SEL_NUM_UTCL2_RTN_SIZE_1M_2M__GFX11 = 0x0000001b, - UTCL1_PERF_SEL_NUM_UTCL2_RTN_SIZE_2M_4M__GFX11 = 0x0000001c, - UTCL1_PERF_SEL_NUM_UTCL2_RTN_SIZE_4M_8M__GFX11 = 0x0000001d, - UTCL1_PERF_SEL_NUM_UTCL2_RTN_SIZE_8M_16M__GFX11 = 0x0000001e, - UTCL1_PERF_SEL_NUM_UTCL2_RTN_SIZE_16M_32M__GFX11 = 0x0000001f, - UTCL1_PERF_SEL_NUM_UTCL2_RTN_SIZE_32M_INF__GFX11 = 0x00000020, - UTCL1_PERF_SEL_UTCL2_REQ_SQUASHED_NUM__GFX11 = 0x00000021, - UTCL1_PERF_SEL_REQ_NUM_CACHE_CORE_0__GFX11 = 0x00000022, - UTCL1_PERF_SEL_REQ_NUM_CACHE_CORE_1__GFX11 = 0x00000023, - UTCL1_PERF_SEL_REQ_NUM_CACHE_CORE_2__GFX11 = 0x00000024, - UTCL1_PERF_SEL_REQ_NUM_CACHE_CORE_3__GFX11 = 0x00000025, - UTCL1_PERF_SEL_STALL_CYCLES_CACHE_CORE_0__GFX11 = 0x00000026, - UTCL1_PERF_SEL_STALL_CYCLES_CACHE_CORE_1__GFX11 = 0x00000027, - UTCL1_PERF_SEL_STALL_CYCLES_CACHE_CORE_2__GFX11 = 0x00000028, - UTCL1_PERF_SEL_STALL_CYCLES_CACHE_CORE_3__GFX11 = 0x00000029, - UTCL1_PERF_SEL_UTCL1_UTCL2_INVACKS__GFX11 = 0x0000002a, - UTCL1_PERF_SEL_UTCL0_UTCL1_INVACKS__GFX11 = 0x0000002b, - UTCL1_PERF_SEL_HITS_PG_SIZE_1__GFX11 = 0x0000002c, - UTCL1_PERF_SEL_HITS_PG_SIZE_2__GFX11 = 0x0000002d, - UTCL1_PERF_SEL_HITS_PG_SIZE_3__GFX11 = 0x0000002e, - UTCL1_PERF_SEL_HITS_PG_SIZE_4__GFX11 = 0x0000002f, - UTCL1_PERF_SEL_REQ_TO_MISS_HNDLR_0__GFX11 = 0x00000030, - UTCL1_PERF_SEL_REQ_TO_MISS_HNDLR_1__GFX11 = 0x00000031, - UTCL1_PERF_SEL_REQ_TO_MISS_HNDLR_2__GFX11 = 0x00000032, - UTCL1_PERF_SEL_REQ_TO_MISS_HNDLR_3__GFX11 = 0x00000033, - UTCL1_PERF_SEL_AVG_INV_LATENCY__GFX11 = 0x00000034, - UTCL1_PERF_SEL_NUM_OF_CYCLES_RQ_EXISTS_TO_CC0__GFX11 = 0x00000035, - UTCL1_PERF_SEL_NUM_OF_CYCLES_RQ_EXISTS_TO_CC1__GFX11 = 0x00000036, - UTCL1_PERF_SEL_NUM_OF_CYCLES_RQ_EXISTS_TO_CC2__GFX11 = 0x00000037, - UTCL1_PERF_SEL_NUM_OF_CYCLES_RQ_EXISTS_TO_CC3__GFX11 = 0x00000038, - UTCL1_PERF_SEL_NUM_OF_CYCLES_W_COLLISION_CC0__GFX11 = 0x00000039, - UTCL1_PERF_SEL_NUM_OF_CYCLES_W_COLLISION_CC1__GFX11 = 0x0000003a, - UTCL1_PERF_SEL_NUM_OF_CYCLES_W_COLLISION_CC2__GFX11 = 0x0000003b, - UTCL1_PERF_SEL_NUM_OF_CYCLES_W_COLLISION_CC3__GFX11 = 0x0000003c, - UTCL1_PERF_SEL_ALOG_INTERRUPT__GFX11 = 0x0000003d, - UTCL1_PERF_SEL_ALOG_INTERRUPT_DROPPED__GFX11 = 0x0000003e, - UTCL1_PERF_SEL_ALOG_CACHE_REQ__GFX11 = 0x0000003f, - UTCL1_PERF_SEL_ALOG_CACHE_HIT__GFX11 = 0x00000040, - UTCL1_PERF_SEL_ALOG_STALL_PMM_CREDITS__GFX11 = 0x00000041, -#endif -#if CHIP_HDR_NAVI23 - UTCL1_PERF_SEL_SMALL_PAGE_HITS__NV23 = 0x00000015, - UTCL1_PERF_SEL_LARGE_PAGE_HITS__NV23 = 0x00000016, - UTCL1_PERF_SEL_UTCL2_REQS_OUTSTANDING_ACCUM__NV23 = 0x00000017, -#endif -#if CHIP_HDR_NAVI24 - UTCL1_PERF_SEL_SMALL_PAGE_HITS__NV24 = 0x00000015, - UTCL1_PERF_SEL_LARGE_PAGE_HITS__NV24 = 0x00000016, - UTCL1_PERF_SEL_UTCL2_REQS_OUTSTANDING_ACCUM__NV24 = 0x00000017, -#endif - UTCL1_PERF_SEL_SMALL_PAGE_HITS__RAPHAEL = 0x00000015, - UTCL1_PERF_SEL_LARGE_PAGE_HITS__RAPHAEL = 0x00000016, - UTCL1_PERF_SEL_UTCL2_REQS_OUTSTANDING_ACCUM__RAPHAEL = 0x00000017, - UTCL1_PERF_SEL_SMALL_PAGE_HITS__REMBRANDT = 0x00000015, - UTCL1_PERF_SEL_LARGE_PAGE_HITS__REMBRANDT = 0x00000016, - UTCL1_PERF_SEL_UTCL2_REQS_OUTSTANDING_ACCUM__REMBRANDT = 0x00000017, -} UTCL1PerfSel; - -constexpr unsigned int MaxUTCL1PerfSelGfx101 = UTCL1_PERF_SEL_RANGE_INV_REQS__GFX101; -#if CHIP_HDR_NAVI22 -constexpr unsigned int MaxUTCL1PerfSelNv22 = UTCL1_PERF_SEL_INV_ALL_VMID_INVREQS__GFX103DERIVATIVE; -#endif -#if CHIP_HDR_NAVI21 -constexpr unsigned int MaxUTCL1PerfSelNv21 = UTCL1_PERF_SEL_INV_ALL_VMID_INVREQS__GFX103DERIVATIVE; -#endif -constexpr unsigned int MaxUTCL1PerfSelRaphael = UTCL1_PERF_SEL_UTCL2_REQS_OUTSTANDING_ACCUM__RAPHAEL; -constexpr unsigned int MaxUTCL1PerfSelRembrandt = UTCL1_PERF_SEL_UTCL2_REQS_OUTSTANDING_ACCUM__REMBRANDT; -#if CHIP_HDR_NAVI24 -constexpr unsigned int MaxUTCL1PerfSelNv24 = UTCL1_PERF_SEL_UTCL2_REQS_OUTSTANDING_ACCUM__NV24; -#endif -#if CHIP_HDR_NAVI23 -constexpr unsigned int MaxUTCL1PerfSelNv23 = UTCL1_PERF_SEL_UTCL2_REQS_OUTSTANDING_ACCUM__NV23; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -constexpr unsigned int MaxUTCL1PerfSelGfx11 = UTCL1_PERF_SEL_ALOG_STALL_PMM_CREDITS__GFX11; -#endif - -typedef enum VGT_CACHE_INVALID_MODE { - VC_ONLY = 0x00000000, - TC_ONLY = 0x00000001, - VC_AND_TC = 0x00000002, -} VGT_CACHE_INVALID_MODE; - -typedef enum VGT_DETECT_ONE { - ENABLE_TF1_OPT = 0x00000000, - DISABLE_TF1_OPT = 0x00000001, -} VGT_DETECT_ONE; - -typedef enum VGT_DETECT_ZERO { - ENABLE_TF0_OPT = 0x00000000, - DISABLE_TF0_OPT = 0x00000001, -} VGT_DETECT_ZERO; - -typedef enum VGT_DIST_MODE { - NO_DIST = 0x00000000, - PATCHES = 0x00000001, - DONUTS = 0x00000002, - TRAPEZOIDS = 0x00000003, -} VGT_DIST_MODE; - -typedef enum VGT_DI_INDEX_SIZE { - DI_INDEX_SIZE_16_BIT = 0x00000000, - DI_INDEX_SIZE_32_BIT = 0x00000001, - DI_INDEX_SIZE_8_BIT = 0x00000002, -} VGT_DI_INDEX_SIZE; - -typedef enum VGT_DI_MAJOR_MODE_SELECT { - DI_MAJOR_MODE_0 = 0x00000000, - DI_MAJOR_MODE_1 = 0x00000001, -} VGT_DI_MAJOR_MODE_SELECT; - -typedef enum VGT_DI_PRIM_TYPE { - DI_PT_NONE = 0x00000000, - DI_PT_POINTLIST = 0x00000001, - DI_PT_LINELIST = 0x00000002, - DI_PT_LINESTRIP = 0x00000003, - DI_PT_TRILIST = 0x00000004, - DI_PT_TRIFAN = 0x00000005, - DI_PT_TRISTRIP = 0x00000006, - DI_PT_2D_RECTANGLE = 0x00000007, - DI_PT_UNUSED_1 = 0x00000008, - DI_PT_PATCH = 0x00000009, - DI_PT_LINELIST_ADJ = 0x0000000a, - DI_PT_LINESTRIP_ADJ = 0x0000000b, - DI_PT_TRILIST_ADJ = 0x0000000c, - DI_PT_TRISTRIP_ADJ = 0x0000000d, - DI_PT_UNUSED_3 = 0x0000000e, - DI_PT_UNUSED_4 = 0x0000000f, - DI_PT_RECTLIST = 0x00000011, - DI_PT_LINELOOP = 0x00000012, - DI_PT_QUADLIST = 0x00000013, - DI_PT_QUADSTRIP = 0x00000014, - DI_PT_POLYGON = 0x00000015, - DI_PT_TRI_WITH_WFLAGS__GFX09 = 0x00000010, - DI_PT_UNUSED_5__GFX10PLUS = 0x00000010, -} VGT_DI_PRIM_TYPE; - -typedef enum VGT_DI_SOURCE_SELECT { - DI_SRC_SEL_DMA = 0x00000000, - DI_SRC_SEL_IMMEDIATE = 0x00000001, - DI_SRC_SEL_AUTO_INDEX = 0x00000002, - DI_SRC_SEL_RESERVED = 0x00000003, -} VGT_DI_SOURCE_SELECT; - -typedef enum VGT_DMA_BUF_TYPE { - VGT_DMA_BUF_MEM = 0x00000000, - VGT_DMA_BUF_RING = 0x00000001, - VGT_DMA_BUF_SETUP = 0x00000002, - VGT_DMA_PTR_UPDATE = 0x00000003, -} VGT_DMA_BUF_TYPE; - -typedef enum VGT_DMA_SWAP_MODE { - VGT_DMA_SWAP_NONE = 0x00000000, - VGT_DMA_SWAP_16_BIT = 0x00000001, - VGT_DMA_SWAP_32_BIT = 0x00000002, - VGT_DMA_SWAP_WORD = 0x00000003, -} VGT_DMA_SWAP_MODE; - -typedef enum VGT_EVENT_TYPE { - Reserved_0x00 = 0x00000000, - SAMPLE_STREAMOUTSTATS1 = 0x00000001, - SAMPLE_STREAMOUTSTATS2 = 0x00000002, - SAMPLE_STREAMOUTSTATS3 = 0x00000003, - CACHE_FLUSH_TS = 0x00000004, - CONTEXT_DONE = 0x00000005, - CACHE_FLUSH = 0x00000006, - CS_PARTIAL_FLUSH = 0x00000007, - VGT_STREAMOUT_SYNC = 0x00000008, - VGT_STREAMOUT_RESET = 0x0000000a, - END_OF_PIPE_INCR_DE = 0x0000000b, - END_OF_PIPE_IB_END = 0x0000000c, - RST_PIX_CNT = 0x0000000d, - BREAK_BATCH = 0x0000000e, - VS_PARTIAL_FLUSH = 0x0000000f, - PS_PARTIAL_FLUSH = 0x00000010, - FLUSH_HS_OUTPUT = 0x00000011, - FLUSH_DFSM = 0x00000012, - RESET_TO_LOWEST_VGT = 0x00000013, - CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014, - CACHE_FLUSH_AND_INV_EVENT = 0x00000016, - PERFCOUNTER_START = 0x00000017, - PERFCOUNTER_STOP = 0x00000018, - PIPELINESTAT_START = 0x00000019, - PIPELINESTAT_STOP = 0x0000001a, - PERFCOUNTER_SAMPLE = 0x0000001b, - SAMPLE_PIPELINESTAT = 0x0000001e, - SO_VGTSTREAMOUT_FLUSH = 0x0000001f, - SAMPLE_STREAMOUTSTATS = 0x00000020, - RESET_VTX_CNT = 0x00000021, - BLOCK_CONTEXT_DONE = 0x00000022, - CS_CONTEXT_DONE = 0x00000023, - VGT_FLUSH = 0x00000024, - TGID_ROLLOVER = 0x00000025, - SQ_NON_EVENT = 0x00000026, - SC_SEND_DB_VPZ = 0x00000027, - BOTTOM_OF_PIPE_TS = 0x00000028, - FLUSH_SX_TS = 0x00000029, - DB_CACHE_FLUSH_AND_INV = 0x0000002a, - FLUSH_AND_INV_DB_DATA_TS = 0x0000002b, - FLUSH_AND_INV_DB_META = 0x0000002c, - FLUSH_AND_INV_CB_DATA_TS = 0x0000002d, - FLUSH_AND_INV_CB_META = 0x0000002e, - CS_DONE = 0x0000002f, - PS_DONE = 0x00000030, - FLUSH_AND_INV_CB_PIXEL_DATA = 0x00000031, - SX_CB_RAT_ACK_REQUEST = 0x00000032, - THREAD_TRACE_START = 0x00000033, - THREAD_TRACE_STOP = 0x00000034, - THREAD_TRACE_MARKER = 0x00000035, - THREAD_TRACE_FINISH = 0x00000037, - PIXEL_PIPE_STAT_CONTROL = 0x00000038, - PIXEL_PIPE_STAT_DUMP = 0x00000039, - PIXEL_PIPE_STAT_RESET = 0x0000003a, - CONTEXT_SUSPEND = 0x0000003b, - OFFCHIP_HS_DEALLOC = 0x0000003c, - ENABLE_NGG_PIPELINE = 0x0000003d, - SET_FE_ID__GFX09 = 0x00000009, - Available_0x1c__GFX09 = 0x0000001c, - Available_0x1d__GFX09 = 0x0000001d, - THREAD_TRACE_FLUSH__GFX09 = 0x00000036, - Reserved_0x3f__GFX09 = 0x0000003f, - ZPASS_DONE__GFX09_10 = 0x00000015, - ENABLE_LEGACY_PIPELINE__GFX09_10 = 0x0000003e, - Reserved_0x09__GFX10PLUS = 0x00000009, - FLUSH_ES_OUTPUT__GFX10PLUS = 0x0000001c, - BIN_CONF_OVERRIDE_CHECK__GFX10PLUS = 0x0000001d, - THREAD_TRACE_DRAW__GFX10PLUS = 0x00000036, - DRAW_DONE__GFX10PLUS = 0x0000003f, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - WAIT_SYNC__GFX11 = 0x00000015, - ENABLE_PIPELINE_NOT_USED__GFX11 = 0x0000003e, -#endif -} VGT_EVENT_TYPE; - -typedef enum VGT_GROUP_CONV_SEL { - VGT_GRP_INDEX_16 = 0x00000000, - VGT_GRP_INDEX_32 = 0x00000001, - VGT_GRP_UINT_16 = 0x00000002, - VGT_GRP_UINT_32 = 0x00000003, - VGT_GRP_SINT_16 = 0x00000004, - VGT_GRP_SINT_32 = 0x00000005, - VGT_GRP_FLOAT_32 = 0x00000006, - VGT_GRP_AUTO_PRIM = 0x00000007, - VGT_GRP_FIX_1_23_TO_FLOAT = 0x00000008, -} VGT_GROUP_CONV_SEL; - -typedef enum VGT_GRP_PRIM_ORDER { - VGT_GRP_LIST = 0x00000000, - VGT_GRP_STRIP = 0x00000001, - VGT_GRP_FAN = 0x00000002, - VGT_GRP_LOOP = 0x00000003, - VGT_GRP_POLYGON = 0x00000004, -} VGT_GRP_PRIM_ORDER; - -typedef enum VGT_GRP_PRIM_TYPE { - VGT_GRP_3D_POINT = 0x00000000, - VGT_GRP_3D_LINE = 0x00000001, - VGT_GRP_3D_TRI = 0x00000002, - VGT_GRP_3D_RECT = 0x00000003, - VGT_GRP_3D_QUAD = 0x00000004, - VGT_GRP_2D_COPY_RECT_V0 = 0x00000005, - VGT_GRP_2D_COPY_RECT_V1 = 0x00000006, - VGT_GRP_2D_COPY_RECT_V2 = 0x00000007, - VGT_GRP_2D_COPY_RECT_V3 = 0x00000008, - VGT_GRP_2D_FILL_RECT = 0x00000009, - VGT_GRP_2D_LINE = 0x0000000a, - VGT_GRP_2D_TRI = 0x0000000b, - VGT_GRP_PRIM_INDEX_LINE = 0x0000000c, - VGT_GRP_PRIM_INDEX_TRI = 0x0000000d, - VGT_GRP_PRIM_INDEX_QUAD = 0x0000000e, - VGT_GRP_3D_LINE_ADJ = 0x0000000f, - VGT_GRP_3D_TRI_ADJ = 0x00000010, - VGT_GRP_3D_PATCH = 0x00000011, - VGT_GRP_2D_RECT = 0x00000012, -} VGT_GRP_PRIM_TYPE; - -typedef enum VGT_GS_CUT_MODE { - GS_CUT_1024__HASHWVS = 0x00000000, - GS_CUT_512__HASHWVS = 0x00000001, - GS_CUT_256__HASHWVS = 0x00000002, - GS_CUT_128__HASHWVS = 0x00000003, -} VGT_GS_CUT_MODE; - -typedef enum VGT_GS_MODE_TYPE { - GS_OFF = 0x00000000, - GS_SCENARIO_A = 0x00000001, - GS_SCENARIO_B = 0x00000002, - GS_SCENARIO_G = 0x00000003, - GS_SCENARIO_C = 0x00000004, - SPRITE_EN = 0x00000005, -} VGT_GS_MODE_TYPE; - -typedef enum VGT_GS_OUTPRIM_TYPE { - POINTLIST = 0x00000000, - LINESTRIP = 0x00000001, - TRISTRIP = 0x00000002, - RECTLIST__GFX09 = 0x00000003, - RECT_2D__GFX10PLUS = 0x00000003, - RECTLIST__GFX10PLUS = 0x00000004, -} VGT_GS_OUTPRIM_TYPE; - -typedef enum VGT_INDEX_TYPE_MODE { - VGT_INDEX_16 = 0x00000000, - VGT_INDEX_32 = 0x00000001, - VGT_INDEX_8 = 0x00000002, -} VGT_INDEX_TYPE_MODE; - -typedef enum VGT_OUTPATH_SELECT { - VGT_OUTPATH_VTX_REUSE = 0x00000000, - VGT_OUTPATH_TESS_EN__GFX09 = 0x00000001, - VGT_OUTPATH_PASSTHRU__GFX09 = 0x00000002, - VGT_OUTPATH_GS_BLOCK__GFX09 = 0x00000003, - VGT_OUTPATH_HS_BLOCK__GFX09 = 0x00000004, - VGT_OUTPATH_PRIM_GEN__GFX09 = 0x00000005, - VGT_OUTPATH_GS_BLOCK__GFX10PLUS = 0x00000001, - VGT_OUTPATH_HS_BLOCK__GFX10PLUS = 0x00000002, - VGT_OUTPATH_PRIM_GEN__GFX10PLUS = 0x00000003, - VGT_OUTPATH_TE_PRIM_GEN__GFX10PLUS = 0x00000004, - VGT_OUTPATH_TE_GS_BLOCK__GFX10PLUS = 0x00000005, - VGT_OUTPATH_TE_OUTPUT__GFX10PLUS = 0x00000006, -} VGT_OUTPATH_SELECT; - -typedef enum VGT_OUT_PRIM_TYPE { - VGT_OUT_POINT = 0x00000000, - VGT_OUT_LINE = 0x00000001, - VGT_OUT_TRI = 0x00000002, - VGT_OUT_RECT_V0__GFX09_10 = 0x00000003, - VGT_OUT_RECT_V1__GFX09_10 = 0x00000004, - VGT_OUT_RECT_V2__GFX09_10 = 0x00000005, - VGT_OUT_RECT_V3__GFX09_10 = 0x00000006, - VGT_OUT_2D_RECT__GFX09_10 = 0x00000007, - VGT_TE_QUAD__GFX09_10 = 0x00000008, - VGT_TE_PRIM_INDEX_LINE__GFX09_10 = 0x00000009, - VGT_TE_PRIM_INDEX_TRI__GFX09_10 = 0x0000000a, - VGT_TE_PRIM_INDEX_QUAD__GFX09_10 = 0x0000000b, - VGT_OUT_LINE_ADJ__GFX09_10 = 0x0000000c, - VGT_OUT_TRI_ADJ__GFX09_10 = 0x0000000d, - VGT_OUT_PATCH__GFX09_10 = 0x0000000e, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - VGT_OUT_2D_RECT__GFX11 = 0x00000003, - VGT_OUT_RECT_V0__GFX11 = 0x00000004, - VGT_OUT_DUMMY_1__GFX11 = 0x00000005, - VGT_OUT_DUMMY_2__GFX11 = 0x00000006, - VGT_OUT_DUMMY_3__GFX11 = 0x00000007, - VGT_OUT_PATCH__GFX11 = 0x00000008, - VGT_OUT_LINE_ADJ__GFX11 = 0x00000009, - VGT_OUT_TRI_ADJ__GFX11 = 0x0000000a, -#endif -} VGT_OUT_PRIM_TYPE; - -typedef enum VGT_PERFCOUNT_SELECT { - vgt_perf_UNUSED0 = 0x00000000, - vgt_perf_VGT_SPI_ESVERT_VALID = 0x00000001, - vgt_perf_VGT_SPI_ESVERT_EOV = 0x00000002, - vgt_perf_VGT_SPI_ESVERT_STALLED = 0x00000003, - vgt_perf_VGT_SPI_ESVERT_STARVED_BUSY = 0x00000004, - vgt_perf_VGT_SPI_ESVERT_STARVED_IDLE = 0x00000005, - vgt_perf_VGT_SPI_ESVERT_STATIC = 0x00000006, - vgt_perf_VGT_SPI_PRIMGEN_GS_PRIM_FIFO_STALL = 0x00000007, - vgt_perf_VGT_SPI_PRIMGEN_ES_VERT_FIFO_STALL = 0x00000008, - vgt_perf_VGT_SPI_GSPRIM_VALID = 0x00000009, - vgt_perf_VGT_SPI_GSPRIM_EOV = 0x0000000a, - vgt_perf_VGT_SPI_GSPRIM_CONT = 0x0000000b, - vgt_perf_VGT_SPI_PRIMGEN_SUBGRP_FIFO_STALL = 0x0000000c, - vgt_perf_VGT_SPI_GSPRIM_STARVED_BUSY = 0x0000000d, - vgt_perf_VGT_SPI_GSPRIM_STARVED_IDLE = 0x0000000e, - vgt_perf_VGT_SPI_GSPRIM_STATIC = 0x0000000f, - vgt_perf_VGT_SPI_GSSUBGRP_EVENT_WINDOW_ACTIVE = 0x00000010, - vgt_perf_VGT_SPI_GSSUBGRP_IS_EVENT = 0x00000011, - vgt_perf_VGT_SPI_GSSUBGRP_SEND = 0x00000012, - vgt_perf_VGT_SPI_VSWAVE_EVENT_WINDOW_ACTIVE = 0x00000013, - vgt_perf_VGT_SPI_VSVERT_SEND = 0x00000014, - vgt_perf_VGT_SPI_VSVERT_EOV = 0x00000015, - vgt_perf_VGT_SPI_VSVERT_STALLED = 0x00000016, - vgt_perf_VGT_SPI_VSVERT_STARVED_BUSY = 0x00000017, - vgt_perf_VGT_SPI_VSVERT_STARVED_IDLE = 0x00000018, - vgt_perf_VGT_SPI_VSVERT_STATIC = 0x00000019, - vgt_perf_VGT_SPI_VSWAVE_IS_EVENT = 0x0000001a, - vgt_perf_VGT_SPI_VSWAVE_SEND = 0x0000001b, - vgt_perf_VGT_PA_EVENT_WINDOW_ACTIVE = 0x0000001c, - vgt_perf_VGT_PA_CLIPV_SEND = 0x0000001d, - vgt_perf_VGT_PA_CLIPV_FIRSTVERT = 0x0000001e, - vgt_perf_VGT_PA_CLIPV_STALLED = 0x0000001f, - vgt_perf_VGT_PA_CLIPV_STARVED_BUSY = 0x00000020, - vgt_perf_VGT_PA_CLIPV_STARVED_IDLE = 0x00000021, - vgt_perf_VGT_PA_CLIPV_STATIC = 0x00000022, - vgt_perf_VGT_PA_CLIPP_SEND = 0x00000023, - vgt_perf_VGT_PA_CLIPP_EOP = 0x00000024, - vgt_perf_VGT_PA_CLIPP_IS_EVENT = 0x00000025, - vgt_perf_VGT_PA_CLIPP_NULL_PRIM = 0x00000026, - vgt_perf_VGT_PA_CLIPP_NEW_VTX_VECT = 0x00000027, - vgt_perf_VGT_PA_CLIPP_STALLED = 0x00000028, - vgt_perf_VGT_PA_CLIPP_STARVED_BUSY = 0x00000029, - vgt_perf_VGT_PA_CLIPP_STARVED_IDLE = 0x0000002a, - vgt_perf_VGT_PA_CLIPP_STATIC = 0x0000002b, - vgt_perf_VGT_PA_CLIPS_SEND = 0x0000002c, - vgt_perf_VGT_PA_CLIPS_STALLED = 0x0000002d, - vgt_perf_VGT_PA_CLIPS_STARVED_BUSY = 0x0000002e, - vgt_perf_VGT_PA_CLIPS_STARVED_IDLE = 0x0000002f, - vgt_perf_VGT_PA_CLIPS_STATIC = 0x00000030, - vgt_perf_VSVERT_DS_SEND = 0x00000031, - vgt_perf_VSVERT_API_SEND = 0x00000032, - vgt_perf_hs_tif_stall = 0x00000033, - vgt_perf_hs_input_stall = 0x00000034, - vgt_perf_hs_interface_stall = 0x00000035, - vgt_perf_hs_tfm_stall = 0x00000036, - vgt_perf_te11_starved_after_work = 0x00000037, - vgt_perf_gs_event_stall = 0x00000038, - vgt_perf_vgt_pa_clipp_send_not_event = 0x00000039, - vgt_perf_vgt_pa_clipp_valid_prim = 0x0000003a, - vgt_perf_REUSED_ES_INDICES = 0x0000003b, - vgt_perf_VS_CACHE_HITS = 0x0000003c, - vgt_perf_GS_CACHE_HITS = 0x0000003d, - vgt_perf_DS_CACHE_HITS = 0x0000003e, - vgt_perf_TOTAL_CACHE_HITS = 0x0000003f, - vgt_perf_vgt_busy = 0x00000040, - vgt_perf_vgt_gs_busy = 0x00000041, - vgt_perf_gsprim_stalled = 0x00000042, - vgt_perf_esvert_stalled_gs_tbl = 0x00000043, - vgt_perf_esvert_stalled_gs_event = 0x00000044, - vgt_perf_vgt_spi_primgen_wd_interface_stall = 0x00000045, - vgt_perf_UNUSED70 = 0x00000046, - vgt_perf_gsprim_stalled_gs_tbl = 0x00000047, - vgt_perf_gsprim_stalled_gs_event = 0x00000048, - vgt_perf_gsprim_stalled_esvert = 0x00000049, - vgt_perf_UNUSED74 = 0x0000004a, - vgt_perf_UNUSED75 = 0x0000004b, - vgt_perf_counters_avail_stalled = 0x0000004c, - vgt_perf_gs_rb_space_avail_stalled = 0x0000004d, - vgt_perf_gs_issue_rtr_stalled = 0x0000004e, - vgt_perf_gssubgrp_stalled = 0x0000004f, - vgt_perf_strmout_stalled = 0x00000050, - vgt_perf_UNUSED81 = 0x00000051, - vgt_perf_cm_stalled_by_gog = 0x00000052, - vgt_perf_cm_reading_stalled = 0x00000053, - vgt_perf_cm_stalled_by_gsfetch_done = 0x00000054, - vgt_perf_gog_vs_tbl_stalled = 0x00000055, - vgt_perf_gog_out_indx_stalled = 0x00000056, - vgt_perf_gog_out_prim_stalled = 0x00000057, - vgt_perf_waveid_stalled = 0x00000058, - vgt_perf_gog_busy = 0x00000059, - vgt_perf_REUSED_VS_INDICES = 0x0000005a, - vgt_perf_sclk_reg_vld_event = 0x0000005b, - vgt_perf_vs_conflicting_indices = 0x0000005c, - vgt_perf_sclk_core_vld_event = 0x0000005d, - vgt_perf_hsthdgrp_stalled = 0x0000005e, - vgt_perf_sclk_gs_vld_event = 0x0000005f, - vgt_perf_vgt_spi_lsvert_valid = 0x00000060, - vgt_perf_vgt_spi_lsvert_eov = 0x00000061, - vgt_perf_vgt_spi_lsvert_stalled = 0x00000062, - vgt_perf_vgt_spi_lsvert_starved_busy = 0x00000063, - vgt_perf_vgt_spi_lsvert_starved_idle = 0x00000064, - vgt_perf_vgt_spi_lsvert_static = 0x00000065, - vgt_perf_UNUSED102 = 0x00000066, - vgt_perf_UNUSED103 = 0x00000067, - vgt_perf_UNUSED104 = 0x00000068, - vgt_perf_vgt_spi_hsvert_valid = 0x00000069, - vgt_perf_vgt_spi_hsvert_eov = 0x0000006a, - vgt_perf_vgt_spi_hsvert_stalled = 0x0000006b, - vgt_perf_vgt_spi_hsvert_starved_busy = 0x0000006c, - vgt_perf_vgt_spi_hsvert_starved_idle = 0x0000006d, - vgt_perf_vgt_spi_hsvert_static = 0x0000006e, - vgt_perf_vgt_spi_hsthdgrp_event_window_active = 0x0000006f, - vgt_perf_vgt_spi_hsthdgrp_is_event = 0x00000070, - vgt_perf_vgt_spi_hsthdgrp_send = 0x00000071, - vgt_perf_ds_prims = 0x00000072, - vgt_perf_UNUSED115 = 0x00000073, - vgt_perf_UNUSED116 = 0x00000074, - vgt_perf_hs_thread_groups = 0x00000075, - vgt_perf_UNUSED118 = 0x00000076, - vgt_perf_vs_thread_groups = 0x00000077, - vgt_perf_UNUSED120 = 0x00000078, - vgt_perf_UNUSED121 = 0x00000079, - vgt_perf_UNUSED122 = 0x0000007a, - vgt_perf_gs_done_latency = 0x0000007b, - vgt_perf_vgt_hs_busy = 0x0000007c, - vgt_perf_vgt_te11_busy = 0x0000007d, - vgt_perf_UNUSED126 = 0x0000007e, - vgt_perf_hs_flush = 0x0000007f, - vgt_perf_UNUSED128 = 0x00000080, - vgt_perf_vgt_pa_clipp_eopg = 0x00000081, - vgt_perf_UNUSED130 = 0x00000082, - vgt_perf_UNUSED131 = 0x00000083, - vgt_perf_UNUSED132 = 0x00000084, - vgt_perf_gs_done = 0x00000085, - vgt_perf_vs_done = 0x00000086, - vgt_perf_gs_done_received = 0x00000087, - vgt_perf_UNUSED136 = 0x00000088, - vgt_perf_gs_ring_high_water_mark = 0x00000089, - vgt_perf_vs_table_high_water_mark = 0x0000008a, - vgt_perf_hs_tgs_active_high_water_mark = 0x0000008b, - vgt_perf_pa_clipp_dealloc = 0x0000008c, - vgt_perf_cut_mem_flush_stalled = 0x0000008d, - vgt_perf_vsvert_work_received = 0x0000008e, - vgt_perf_vgt_pa_clipp_starved_after_work = 0x0000008f, - vgt_perf_te11_con_starved_after_work = 0x00000090, - vgt_perf_te11_con_stalled = 0x00000091, - vgt_perf_vgt_spi_vsvert_valid = 0x00000092, - vgt_perf_sclk_te11_vld = 0x00000093, -} VGT_PERFCOUNT_SELECT; - -constexpr unsigned int MaxVgtPerfcountSelect = vgt_perf_sclk_te11_vld; - -typedef enum VGT_RDREQ_POLICY { - VGT_POLICY_LRU = 0x00000000, - VGT_POLICY_STREAM = 0x00000001, - VGT_POLICY_BYPASS__GFX10PLUS = 0x00000002, -} VGT_RDREQ_POLICY; - -typedef enum VGT_STAGES_ES_EN { - ES_STAGE_OFF = 0x00000000, - ES_STAGE_DS = 0x00000001, - ES_STAGE_REAL = 0x00000002, - RESERVED_ES = 0x00000003, -} VGT_STAGES_ES_EN; - -typedef enum VGT_STAGES_GS_EN { - GS_STAGE_OFF = 0x00000000, - GS_STAGE_ON = 0x00000001, -} VGT_STAGES_GS_EN; - -typedef enum VGT_STAGES_HS_EN { - HS_STAGE_OFF = 0x00000000, - HS_STAGE_ON = 0x00000001, -} VGT_STAGES_HS_EN; - -typedef enum VGT_STAGES_LS_EN { - LS_STAGE_OFF = 0x00000000, - LS_STAGE_ON = 0x00000001, - CS_STAGE_ON = 0x00000002, - RESERVED_LS = 0x00000003, -} VGT_STAGES_LS_EN; - -typedef enum VGT_STAGES_VS_EN { - VS_STAGE_REAL = 0x00000000, - VS_STAGE_DS = 0x00000001, - VS_STAGE_COPY_SHADER = 0x00000002, - RESERVED_VS = 0x00000003, -} VGT_STAGES_VS_EN; - -typedef enum VGT_TESS_PARTITION { - PART_INTEGER = 0x00000000, - PART_POW2 = 0x00000001, - PART_FRAC_ODD = 0x00000002, - PART_FRAC_EVEN = 0x00000003, -} VGT_TESS_PARTITION; - -typedef enum VGT_TESS_TOPOLOGY { - OUTPUT_POINT = 0x00000000, - OUTPUT_LINE = 0x00000001, - OUTPUT_TRIANGLE_CW = 0x00000002, - OUTPUT_TRIANGLE_CCW = 0x00000003, -} VGT_TESS_TOPOLOGY; - -typedef enum VGT_TESS_TYPE { - TESS_ISOLINE = 0x00000000, - TESS_TRIANGLE = 0x00000001, - TESS_QUAD = 0x00000002, -} VGT_TESS_TYPE; - -typedef enum VRSCombinerMode { - VRS_COMB_MODE_PASSTHRU = 0x00000000, - VRS_COMB_MODE_OVERRIDE = 0x00000001, - VRS_COMB_MODE_MIN = 0x00000002, - VRS_COMB_MODE_MAX = 0x00000003, - VRS_COMB_MODE_SATURATE = 0x00000004, -} VRSCombinerMode; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef enum VRSCombinerModeSC { - SC_VRS_COMB_MODE_PASSTHRU = 0x00000000, - SC_VRS_COMB_MODE_OVERRIDE = 0x00000001, - SC_VRS_COMB_MODE_MIN = 0x00000002, - SC_VRS_COMB_MODE_MAX = 0x00000003, - SC_VRS_COMB_MODE_SATURATE = 0x00000004, -} VRSCombinerModeSC; -#endif - -typedef enum VRSHtileEncoding { - VRS_HTILE_DISABLE = 0x00000000, - VRS_HTILE_2BIT_ENCODING = 0x00000001, - VRS_HTILE_4BIT_ENCODING = 0x00000002, -} VRSHtileEncoding; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef enum VRSrate { - VRS_SHADING_RATE_1X1 = 0x00000000, - VRS_SHADING_RATE_1X2 = 0x00000001, - VRS_SHADING_RATE_UNDEFINED0 = 0x00000002, - VRS_SHADING_RATE_UNDEFINED1 = 0x00000003, - VRS_SHADING_RATE_2X1 = 0x00000004, - VRS_SHADING_RATE_2X2 = 0x00000005, - VRS_SHADING_RATE_2X4 = 0x00000006, - VRS_SHADING_RATE_UNDEFINED2 = 0x00000007, - VRS_SHADING_RATE_UNDEFINED3 = 0x00000008, - VRS_SHADING_RATE_4X2 = 0x00000009, - VRS_SHADING_RATE_4X4 = 0x0000000a, - VRS_SHADING_RATE_UNDEFINED4 = 0x0000000b, - VRS_SHADING_RATE_16X_SSAA = 0x0000000c, - VRS_SHADING_RATE_8X_SSAA = 0x0000000d, - VRS_SHADING_RATE_4X_SSAA = 0x0000000e, - VRS_SHADING_RATE_2X_SSAA = 0x0000000f, -} VRSrate; -#endif - -typedef enum WD_IA_DRAW_REG_XFER { - WD_IA_DRAW_REG_XFER_IA_MULTI_VGT_PARAM = 0x00000000, - WD_IA_DRAW_REG_XFER_VGT_MULTI_PRIM_IB_RESET_EN = 0x00000001, - WD_IA_DRAW_REG_XFER_VGT_INSTANCE_BASE_ID = 0x00000002, - WD_IA_DRAW_REG_XFER_GE_CNTL__GFX10PLUS = 0x00000003, - WD_IA_DRAW_REG_XFER_GE_USER_VGPR_EN__GFX10PLUS = 0x00000004, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - WD_IA_DRAW_REG_XFER_FL_MS_WG_DIM__GFX11 = 0x00000005, - WD_IA_DRAW_REG_XFER_FL_MS_WG_DIM_1__GFX11 = 0x00000006, - WD_IA_DRAW_REG_XFER_FL_MS_TG_SIZE__GFX11 = 0x00000007, - WD_IA_DRAW_REG_XFER_FL_MS_EXP_ALLOC__GFX11 = 0x00000008, - WD_IA_DRAW_REG_XFER_VGT_GS_OUT_PRIM_TYPE__GFX11 = 0x00000009, -#endif -} WD_IA_DRAW_REG_XFER; - -typedef enum WD_IA_DRAW_SOURCE { - WD_IA_DRAW_SOURCE_DMA = 0x00000000, - WD_IA_DRAW_SOURCE_IMMD = 0x00000001, - WD_IA_DRAW_SOURCE_AUTO = 0x00000002, - WD_IA_DRAW_SOURCE_OPAQ = 0x00000003, -} WD_IA_DRAW_SOURCE; - -typedef enum WD_IA_DRAW_TYPE { - WD_IA_DRAW_TYPE_DI_MM0 = 0x00000000, - WD_IA_DRAW_TYPE_REG_XFER = 0x00000001, - WD_IA_DRAW_TYPE_EVENT_INIT = 0x00000002, - WD_IA_DRAW_TYPE_EVENT_ADDR = 0x00000003, - WD_IA_DRAW_TYPE_MIN_INDX = 0x00000004, - WD_IA_DRAW_TYPE_MAX_INDX = 0x00000005, - WD_IA_DRAW_TYPE_INDX_OFF = 0x00000006, - WD_IA_DRAW_TYPE_IMM_DATA = 0x00000007, -} WD_IA_DRAW_TYPE; - -typedef enum WD_PERFCOUNT_SELECT { - wd_perf_rbiu_fifos_event_window_active = 0x00000000, - wd_perf_rbiu_dr_fifo_starved = 0x00000001, - wd_perf_rbiu_dr_fifo_stalled = 0x00000002, - wd_perf_rbiu_di_fifo_starved = 0x00000003, - wd_perf_rbiu_di_fifo_stalled = 0x00000004, - wd_perf_wd_busy = 0x00000005, - wd_perf_wd_sclk_reg_vld_event = 0x00000006, - wd_perf_wd_sclk_input_vld_event = 0x00000007, - wd_perf_wd_sclk_core_vld_event = 0x00000008, - wd_perf_WD_STALLED = 0x00000009, - wd_perf_inside_tf_bin_0 = 0x0000000a, - wd_perf_inside_tf_bin_1 = 0x0000000b, - wd_perf_inside_tf_bin_2 = 0x0000000c, - wd_perf_inside_tf_bin_3 = 0x0000000d, - wd_perf_inside_tf_bin_4 = 0x0000000e, - wd_perf_inside_tf_bin_5 = 0x0000000f, - wd_perf_inside_tf_bin_6 = 0x00000010, - wd_perf_inside_tf_bin_7 = 0x00000011, - wd_perf_inside_tf_bin_8 = 0x00000012, - wd_perf_tfreq_lat_bin_0 = 0x00000013, - wd_perf_tfreq_lat_bin_1 = 0x00000014, - wd_perf_tfreq_lat_bin_2 = 0x00000015, - wd_perf_tfreq_lat_bin_3 = 0x00000016, - wd_perf_tfreq_lat_bin_4 = 0x00000017, - wd_perf_tfreq_lat_bin_5 = 0x00000018, - wd_perf_tfreq_lat_bin_6 = 0x00000019, - wd_perf_tfreq_lat_bin_7 = 0x0000001a, - wd_starved_on_hs_done = 0x0000001b, - wd_perf_se0_hs_done_latency = 0x0000001c, - wd_perf_se1_hs_done_latency = 0x0000001d, - wd_perf_se2_hs_done_latency = 0x0000001e, - wd_perf_se3_hs_done_latency = 0x0000001f, - wd_perf_hs_done_se0 = 0x00000020, - wd_perf_hs_done_se1 = 0x00000021, - wd_perf_hs_done_se2 = 0x00000022, - wd_perf_hs_done_se3 = 0x00000023, - wd_perf_null_patches = 0x00000024, - wd_perf_sclk_te11_cld = 0x00000025, - wd_perf_csb_lat_bin_0 = 0x00000026, - wd_perf_csb_lat_bin_1 = 0x00000027, - wd_perf_csb_lat_bin_2 = 0x00000028, - wd_perf_csb_lat_bin_3 = 0x00000029, - wd_perf_csb_lat_bin_4 = 0x0000002a, - wd_perf_csb_lat_bin_5 = 0x0000002b, - wd_perf_csb_lat_bin_6 = 0x0000002c, - wd_perf_csb_lat_bin_7 = 0x0000002d, - wd_perf_rm_stalled_pos_buf = 0x0000002e, - wd_perf_rm_stalled_param_buf = 0x0000002f, - wd_perf_rm_stalled_index_buf = 0x00000030, - wd_perf_rm_stalled_csb_buf = 0x00000031, - wd_perf_utcl1_translation_miss_event = 0x00000032, - wd_perf_utcl1_translation_hit_event = 0x00000033, - wd_perf_utcl1_stall_event = 0x00000034, - wd_perf_utcl1_retry_event = 0x00000035, - wd_perf_utcl1_consecutive_retry_event = 0x00000036, - wd_perf_utcl1_request_event_1 = 0x00000037, - wd_perf_utcl1_request_event_0 = 0x00000038, - wd_perf_utcl1_stall_utcl2_event = 0x00000039, -} WD_PERFCOUNT_SELECT; - -constexpr unsigned int MaxWdPerfcountSelect = wd_perf_utcl1_stall_utcl2_event; - -typedef enum WritePolicy { - CACHE_LRU_WR = 0x00000000, - CACHE_STREAM = 0x00000001, - CACHE_NOA_WR = 0x00000002, - CACHE_BYPASS = 0x00000003, -} WritePolicy; - -typedef enum ZFormat { - Z_INVALID = 0x00000000, - Z_16 = 0x00000001, - Z_24 = 0x00000002, - Z_32_FLOAT = 0x00000003, -} ZFormat; - -typedef enum ZLimitSumm { - FORCE_SUMM_OFF = 0x00000000, - FORCE_SUMM_MINZ = 0x00000001, - FORCE_SUMM_MAXZ = 0x00000002, - FORCE_SUMM_BOTH = 0x00000003, -} ZLimitSumm; - -typedef enum ZModeForce { - NO_FORCE = 0x00000000, - FORCE_EARLY_Z = 0x00000001, - FORCE_LATE_Z = 0x00000002, - FORCE_RE_Z = 0x00000003, -} ZModeForce; - -typedef enum ZOrder { - LATE_Z = 0x00000000, - EARLY_Z_THEN_LATE_Z = 0x00000001, - RE_Z = 0x00000002, - EARLY_Z_THEN_RE_Z = 0x00000003, -} ZOrder; - -typedef enum ZpassControl { - ZPASS_DISABLE = 0x00000000, - ZPASS_SAMPLES = 0x00000001, - ZPASS_PIXELS = 0x00000002, -} ZpassControl; - -typedef enum ZSamplePosition { - Z_SAMPLE_CENTER = 0x00000000, - Z_SAMPLE_CENTROID = 0x00000001, -} ZSamplePosition; - -constexpr unsigned int CONFIG_SPACE1_END = 0x00002bff; -constexpr unsigned int CONFIG_SPACE1_START = 0x00002000; -constexpr unsigned int CONFIG_SPACE2_END = 0x00009fff; -constexpr unsigned int CONFIG_SPACE2_START = 0x00003000; -constexpr unsigned int CONFIG_SPACE_END = 0x00009fff; -constexpr unsigned int CONFIG_SPACE_START = 0x00002000; -constexpr unsigned int CONTEXT_SPACE_START = 0x0000a000; -constexpr unsigned int CSDATA_ADDR_WIDTH = 0x00000007; -constexpr unsigned int CSDATA_DATA_WIDTH = 0x00000020; -constexpr unsigned int CSDATA_TYPE_WIDTH = 0x00000002; -constexpr unsigned int GB_TILING_CONFIG_MACROTABLE_SIZE = 0x00000010; -constexpr unsigned int GB_TILING_CONFIG_TABLE_SIZE = 0x00000020; -constexpr unsigned int GSTHREADID_SIZE = 0x00000002; -constexpr unsigned int INST_ID_ECC_INTERRUPT_MSG = 0xfffffff0; -constexpr unsigned int INST_ID_HOST_REG_TRAP_MSG = 0xfffffffe; -constexpr unsigned int INST_ID_HW_TRAP = 0xfffffff2; -constexpr unsigned int INST_ID_KILL_SEQ = 0xfffffff3; -constexpr unsigned int INST_ID_PRIV_START = 0x80000000; -constexpr unsigned int INST_ID_SPI_WREXEC = 0xfffffff4; -constexpr unsigned int INST_ID_TTRACE_NEW_PC_MSG = 0xfffffff1; -constexpr unsigned int IQ_DEQUEUE_RETRY = 0x00000004; -constexpr unsigned int IQ_INTR_TYPE_IB = 0x00000001; -constexpr unsigned int IQ_INTR_TYPE_MQD = 0x00000002; -constexpr unsigned int IQ_INTR_TYPE_PQ = 0x00000000; -constexpr unsigned int IQ_OFFLOAD_RETRY = 0x00000001; -constexpr unsigned int IQ_QUEUE_SLEEP = 0x00000000; -constexpr unsigned int IQ_SCH_WAVE_MSG = 0x00000002; -constexpr unsigned int PERSISTENT_SPACE_END = 0x00002fff; -constexpr unsigned int PERSISTENT_SPACE_START = 0x00002c00; -constexpr unsigned int SIMM16_WAITCNT_EXP_CNT_SIZE = 0x00000003; -constexpr unsigned int SQDEC_BEGIN = 0x00002300; -constexpr unsigned int SQDEC_END = 0x000023ff; -constexpr unsigned int SQGFXUDEC_BEGIN = 0x0000c330; -constexpr unsigned int SQGFXUDEC_END = 0x0000c380; -constexpr unsigned int SQIND_GLOBAL_REGS_OFFSET = 0x00000000; -constexpr unsigned int SQIND_GLOBAL_REGS_SIZE = 0x00000008; -constexpr unsigned int SQIND_LOCAL_REGS_OFFSET = 0x00000008; -constexpr unsigned int SQIND_LOCAL_REGS_SIZE = 0x00000008; -constexpr unsigned int SQIND_WAVE_SGPRS_OFFSET = 0x00000200; -constexpr unsigned int SQIND_WAVE_SGPRS_SIZE = 0x00000200; -constexpr unsigned int SQIND_WAVE_VGPRS_OFFSET = 0x00000400; -constexpr unsigned int SQPERFDDEC_BEGIN = 0x0000d1c0; -constexpr unsigned int SQPERFDDEC_END = 0x0000d240; -constexpr unsigned int SQPERFSDEC_BEGIN = 0x0000d9c0; -constexpr unsigned int SQPERFSDEC_END = 0x0000da40; -constexpr unsigned int SQPWRDEC_BEGIN = 0x0000f08c; -constexpr unsigned int SQPWRDEC_END = 0x0000f094; -constexpr unsigned int SQ_DISPATCHER_GFX_CNT_PER_RING = 0x00000008; -constexpr unsigned int SQ_DISPATCHER_GFX_MIN = 0x00000010; -constexpr unsigned int SQ_EDC_FUE_CNTL_LDS = 0x00000005; -constexpr unsigned int SQ_EDC_FUE_CNTL_SIMD0 = 0x00000000; -constexpr unsigned int SQ_EDC_FUE_CNTL_SIMD1 = 0x00000001; -constexpr unsigned int SQ_EDC_FUE_CNTL_SIMD2 = 0x00000002; -constexpr unsigned int SQ_EDC_FUE_CNTL_SIMD3 = 0x00000003; -constexpr unsigned int SQ_EDC_FUE_CNTL_SQ = 0x00000004; -constexpr unsigned int SQ_EDC_FUE_CNTL_TA = 0x00000007; -constexpr unsigned int SQ_EDC_FUE_CNTL_TCP = 0x00000008; -constexpr unsigned int SQ_EDC_FUE_CNTL_TD = 0x00000006; -constexpr unsigned int SQ_EX_MODE_EXCP_ADDR_WATCH0 = 0x00000007; -constexpr unsigned int SQ_EX_MODE_EXCP_DIV0 = 0x00000002; -constexpr unsigned int SQ_EX_MODE_EXCP_HI_ADDR_WATCH1 = 0x00000000; -constexpr unsigned int SQ_EX_MODE_EXCP_HI_ADDR_WATCH2 = 0x00000001; -constexpr unsigned int SQ_EX_MODE_EXCP_HI_ADDR_WATCH3 = 0x00000002; -constexpr unsigned int SQ_EX_MODE_EXCP_INEXACT = 0x00000005; -constexpr unsigned int SQ_EX_MODE_EXCP_INPUT_DENORM = 0x00000001; -constexpr unsigned int SQ_EX_MODE_EXCP_INT_DIV0 = 0x00000006; -constexpr unsigned int SQ_EX_MODE_EXCP_INVALID = 0x00000000; -constexpr unsigned int SQ_EX_MODE_EXCP_MEM_VIOL = 0x00000008; -constexpr unsigned int SQ_EX_MODE_EXCP_OVERFLOW = 0x00000003; -constexpr unsigned int SQ_EX_MODE_EXCP_UNDERFLOW = 0x00000004; -constexpr unsigned int SQ_EX_MODE_EXCP_VALU_BASE = 0x00000000; -constexpr unsigned int SQ_EX_MODE_EXCP_VALU_SIZE = 0x00000007; -constexpr unsigned int SQ_GFXDEC_BEGIN = 0x0000a000; -constexpr unsigned int SQ_GFXDEC_END = 0x0000c000; -constexpr unsigned int SQ_GFXDEC_STATE_ID_SHIFT = 0x0000000a; -constexpr unsigned int SQ_MAX_PGM_SGPRS = 0x00000068; -constexpr unsigned int SQ_MAX_PGM_VGPRS = 0x00000100; -constexpr unsigned int SQ_WAVE_TYPE_PS0 = 0x00000000; -constexpr unsigned int SRCID_NONSECURE_CP = 0x00000001; -constexpr unsigned int SRCID_NONSECURE_CP_RCIU = 0x00000001; -constexpr unsigned int SRCID_RLC = 0x00000000; -constexpr unsigned int SRCID_RLCV = 0x00000006; -constexpr unsigned int SRCID_SECURE_CP = 0x00000007; -constexpr unsigned int SRCID_SECURE_CP_RCIU = 0x00000007; -constexpr unsigned int UCONFIG_SPACE_END = 0x0000ffff; -constexpr unsigned int UCONFIG_SPACE_START = 0x0000c000; -constexpr unsigned int VMID_SZ = 0x00000004; - -#if CHIP_HDR_PHOENIX1 -namespace Apu11 -{ - constexpr unsigned int AL_REVISION_ID = 0x00000012; - constexpr unsigned int EMMC_CFG_REVISION_ID = 0x00000000; - constexpr unsigned int EMMC_HC_REG_REVISION_ID = 0x00000000; - constexpr unsigned int ENHIOMEMAPERTURE_REVISION_ID = 0x00000000; - constexpr unsigned int ILA_REG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_DEVICE_CFG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_HARD_ADDR_HCLK_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_HARD_ADDR_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_P2P_CFG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_ESPI_REG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_SDP_REG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_USB_PD_REVISION_ID = 0x00000000; - constexpr unsigned int MMREG_REVISION_ID = 0x00000000; - constexpr unsigned int NUM_REQUESTORS = 0x00000005; - constexpr unsigned int REQID_ECHI2 = 0x00000004; - constexpr unsigned int REQID_EHCI = 0x00000001; - constexpr unsigned int REQID_OCHI2 = 0x00000003; - constexpr unsigned int REQID_OHCI = 0x00000000; - constexpr unsigned int REQID_SATA0 = 0x00000002; - constexpr unsigned int SDB_REG_REVISION_ID = 0x00000000; - constexpr unsigned int SD_PCI_CFG_REVISION_ID = 0x00000000; -} // namespace Apu11 -#endif - -namespace Core -{ - constexpr unsigned int IQ_SEM_REARM = 0x00000003; - constexpr unsigned int SEM_ECC_ERROR = 0x00000000; - constexpr unsigned int SEM_TRANS_ERROR = 0x00000001; -} // namespace Core - -namespace Gfx09 -{ - constexpr unsigned int SIMM16_WAITCNT_EXP_CNT_START = 0x00000004; - constexpr unsigned int SIMM16_WAITCNT_LGKM_CNT_SIZE = 0x00000004; - constexpr unsigned int SIMM16_WAITCNT_LGKM_CNT_START = 0x00000008; - constexpr unsigned int SIMM16_WAITCNT_VM_CNT_HI_SIZE = 0x00000002; - constexpr unsigned int SIMM16_WAITCNT_VM_CNT_HI_START = 0x0000000e; - constexpr unsigned int SIMM16_WAITCNT_VM_CNT_SIZE = 0x00000004; - constexpr unsigned int SIMM16_WAITCNT_VM_CNT_START = 0x00000000; - constexpr unsigned int SQIND_WAVE_HWREGS_OFFSET = 0x00000010; - constexpr unsigned int SQIND_WAVE_HWREGS_SIZE = 0x000001f0; - constexpr unsigned int SQIND_WAVE_VGPRS_SIZE = 0x00000100; - constexpr unsigned int SQ_ATTR0 = 0x00000000; - constexpr unsigned int SQ_BUFFER_ATOMIC_ADD = 0x00000042; - constexpr unsigned int SQ_BUFFER_ATOMIC_ADD_X2 = 0x00000062; - constexpr unsigned int SQ_BUFFER_ATOMIC_AND = 0x00000048; - constexpr unsigned int SQ_BUFFER_ATOMIC_AND_X2 = 0x00000068; - constexpr unsigned int SQ_BUFFER_ATOMIC_CMPSWAP = 0x00000041; - constexpr unsigned int SQ_BUFFER_ATOMIC_CMPSWAP_X2 = 0x00000061; - constexpr unsigned int SQ_BUFFER_ATOMIC_DEC = 0x0000004c; - constexpr unsigned int SQ_BUFFER_ATOMIC_DEC_X2 = 0x0000006c; - constexpr unsigned int SQ_BUFFER_ATOMIC_INC = 0x0000004b; - constexpr unsigned int SQ_BUFFER_ATOMIC_INC_X2 = 0x0000006b; - constexpr unsigned int SQ_BUFFER_ATOMIC_OR = 0x00000049; - constexpr unsigned int SQ_BUFFER_ATOMIC_OR_X2 = 0x00000069; - constexpr unsigned int SQ_BUFFER_ATOMIC_SMAX = 0x00000046; - constexpr unsigned int SQ_BUFFER_ATOMIC_SMAX_X2 = 0x00000066; - constexpr unsigned int SQ_BUFFER_ATOMIC_SMIN = 0x00000044; - constexpr unsigned int SQ_BUFFER_ATOMIC_SMIN_X2 = 0x00000064; - constexpr unsigned int SQ_BUFFER_ATOMIC_SUB = 0x00000043; - constexpr unsigned int SQ_BUFFER_ATOMIC_SUB_X2 = 0x00000063; - constexpr unsigned int SQ_BUFFER_ATOMIC_SWAP = 0x00000040; - constexpr unsigned int SQ_BUFFER_ATOMIC_SWAP_X2 = 0x00000060; - constexpr unsigned int SQ_BUFFER_ATOMIC_UMAX = 0x00000047; - constexpr unsigned int SQ_BUFFER_ATOMIC_UMAX_X2 = 0x00000067; - constexpr unsigned int SQ_BUFFER_ATOMIC_UMIN = 0x00000045; - constexpr unsigned int SQ_BUFFER_ATOMIC_UMIN_X2 = 0x00000065; - constexpr unsigned int SQ_BUFFER_ATOMIC_XOR = 0x0000004a; - constexpr unsigned int SQ_BUFFER_ATOMIC_XOR_X2 = 0x0000006a; - constexpr unsigned int SQ_BUFFER_LOAD_DWORD = 0x00000014; - constexpr unsigned int SQ_BUFFER_LOAD_DWORDX2 = 0x00000015; - constexpr unsigned int SQ_BUFFER_LOAD_DWORDX3 = 0x00000016; - constexpr unsigned int SQ_BUFFER_LOAD_DWORDX4 = 0x00000017; - constexpr unsigned int SQ_BUFFER_LOAD_FORMAT_D16_HI_X = 0x00000026; - constexpr unsigned int SQ_BUFFER_LOAD_FORMAT_D16_X = 0x00000008; - constexpr unsigned int SQ_BUFFER_LOAD_FORMAT_D16_XY = 0x00000009; - constexpr unsigned int SQ_BUFFER_LOAD_FORMAT_D16_XYZ = 0x0000000a; - constexpr unsigned int SQ_BUFFER_LOAD_FORMAT_D16_XYZW = 0x0000000b; - constexpr unsigned int SQ_BUFFER_LOAD_FORMAT_X = 0x00000000; - constexpr unsigned int SQ_BUFFER_LOAD_FORMAT_XY = 0x00000001; - constexpr unsigned int SQ_BUFFER_LOAD_FORMAT_XYZ = 0x00000002; - constexpr unsigned int SQ_BUFFER_LOAD_FORMAT_XYZW = 0x00000003; - constexpr unsigned int SQ_BUFFER_LOAD_SBYTE = 0x00000011; - constexpr unsigned int SQ_BUFFER_LOAD_SBYTE_D16 = 0x00000022; - constexpr unsigned int SQ_BUFFER_LOAD_SBYTE_D16_HI = 0x00000023; - constexpr unsigned int SQ_BUFFER_LOAD_SHORT_D16 = 0x00000024; - constexpr unsigned int SQ_BUFFER_LOAD_SHORT_D16_HI = 0x00000025; - constexpr unsigned int SQ_BUFFER_LOAD_SSHORT = 0x00000013; - constexpr unsigned int SQ_BUFFER_LOAD_UBYTE = 0x00000010; - constexpr unsigned int SQ_BUFFER_LOAD_UBYTE_D16 = 0x00000020; - constexpr unsigned int SQ_BUFFER_LOAD_UBYTE_D16_HI = 0x00000021; - constexpr unsigned int SQ_BUFFER_LOAD_USHORT = 0x00000012; - constexpr unsigned int SQ_BUFFER_STORE_BYTE = 0x00000018; - constexpr unsigned int SQ_BUFFER_STORE_BYTE_D16_HI = 0x00000019; - constexpr unsigned int SQ_BUFFER_STORE_DWORD = 0x0000001c; - constexpr unsigned int SQ_BUFFER_STORE_DWORDX2 = 0x0000001d; - constexpr unsigned int SQ_BUFFER_STORE_DWORDX3 = 0x0000001e; - constexpr unsigned int SQ_BUFFER_STORE_DWORDX4 = 0x0000001f; - constexpr unsigned int SQ_BUFFER_STORE_FORMAT_D16_HI_X = 0x00000027; - constexpr unsigned int SQ_BUFFER_STORE_FORMAT_D16_X = 0x0000000c; - constexpr unsigned int SQ_BUFFER_STORE_FORMAT_D16_XY = 0x0000000d; - constexpr unsigned int SQ_BUFFER_STORE_FORMAT_D16_XYZ = 0x0000000e; - constexpr unsigned int SQ_BUFFER_STORE_FORMAT_D16_XYZW = 0x0000000f; - constexpr unsigned int SQ_BUFFER_STORE_FORMAT_X = 0x00000004; - constexpr unsigned int SQ_BUFFER_STORE_FORMAT_XY = 0x00000005; - constexpr unsigned int SQ_BUFFER_STORE_FORMAT_XYZ = 0x00000006; - constexpr unsigned int SQ_BUFFER_STORE_FORMAT_XYZW = 0x00000007; - constexpr unsigned int SQ_BUFFER_STORE_LDS_DWORD = 0x0000003d; - constexpr unsigned int SQ_BUFFER_STORE_SHORT = 0x0000001a; - constexpr unsigned int SQ_BUFFER_STORE_SHORT_D16_HI = 0x0000001b; - constexpr unsigned int SQ_BUFFER_WBINVL1 = 0x0000003e; - constexpr unsigned int SQ_BUFFER_WBINVL1_VOL = 0x0000003f; - constexpr unsigned int SQ_CHAN_W = 0x00000003; - constexpr unsigned int SQ_CHAN_X = 0x00000000; - constexpr unsigned int SQ_CHAN_Y = 0x00000001; - constexpr unsigned int SQ_CHAN_Z = 0x00000002; - constexpr unsigned int SQ_CNT1 = 0x00000000; - constexpr unsigned int SQ_CNT2 = 0x00000001; - constexpr unsigned int SQ_CNT3 = 0x00000002; - constexpr unsigned int SQ_CNT4 = 0x00000003; - constexpr unsigned int SQ_DPP_BOUND_OFF = 0x00000000; - constexpr unsigned int SQ_DPP_BOUND_ZERO = 0x00000001; - constexpr unsigned int SQ_DPP_QUAD_PERM = 0x00000000; - constexpr unsigned int SQ_DPP_ROW_BCAST15 = 0x00000142; - constexpr unsigned int SQ_DPP_ROW_BCAST31 = 0x00000143; - constexpr unsigned int SQ_DPP_ROW_HALF_MIRROR = 0x00000141; - constexpr unsigned int SQ_DPP_ROW_MIRROR = 0x00000140; - constexpr unsigned int SQ_DPP_ROW_RR1 = 0x00000121; - constexpr unsigned int SQ_DPP_ROW_RR2 = 0x00000122; - constexpr unsigned int SQ_DPP_ROW_RR3 = 0x00000123; - constexpr unsigned int SQ_DPP_ROW_RR4 = 0x00000124; - constexpr unsigned int SQ_DPP_ROW_RR5 = 0x00000125; - constexpr unsigned int SQ_DPP_ROW_RR6 = 0x00000126; - constexpr unsigned int SQ_DPP_ROW_RR7 = 0x00000127; - constexpr unsigned int SQ_DPP_ROW_RR8 = 0x00000128; - constexpr unsigned int SQ_DPP_ROW_RR9 = 0x00000129; - constexpr unsigned int SQ_DPP_ROW_RR10 = 0x0000012a; - constexpr unsigned int SQ_DPP_ROW_RR11 = 0x0000012b; - constexpr unsigned int SQ_DPP_ROW_RR12 = 0x0000012c; - constexpr unsigned int SQ_DPP_ROW_RR13 = 0x0000012d; - constexpr unsigned int SQ_DPP_ROW_RR14 = 0x0000012e; - constexpr unsigned int SQ_DPP_ROW_RR15 = 0x0000012f; - constexpr unsigned int SQ_DPP_ROW_SL1 = 0x00000101; - constexpr unsigned int SQ_DPP_ROW_SL2 = 0x00000102; - constexpr unsigned int SQ_DPP_ROW_SL3 = 0x00000103; - constexpr unsigned int SQ_DPP_ROW_SL4 = 0x00000104; - constexpr unsigned int SQ_DPP_ROW_SL5 = 0x00000105; - constexpr unsigned int SQ_DPP_ROW_SL6 = 0x00000106; - constexpr unsigned int SQ_DPP_ROW_SL7 = 0x00000107; - constexpr unsigned int SQ_DPP_ROW_SL8 = 0x00000108; - constexpr unsigned int SQ_DPP_ROW_SL9 = 0x00000109; - constexpr unsigned int SQ_DPP_ROW_SL10 = 0x0000010a; - constexpr unsigned int SQ_DPP_ROW_SL11 = 0x0000010b; - constexpr unsigned int SQ_DPP_ROW_SL12 = 0x0000010c; - constexpr unsigned int SQ_DPP_ROW_SL13 = 0x0000010d; - constexpr unsigned int SQ_DPP_ROW_SL14 = 0x0000010e; - constexpr unsigned int SQ_DPP_ROW_SL15 = 0x0000010f; - constexpr unsigned int SQ_DPP_ROW_SR1 = 0x00000111; - constexpr unsigned int SQ_DPP_ROW_SR2 = 0x00000112; - constexpr unsigned int SQ_DPP_ROW_SR3 = 0x00000113; - constexpr unsigned int SQ_DPP_ROW_SR4 = 0x00000114; - constexpr unsigned int SQ_DPP_ROW_SR5 = 0x00000115; - constexpr unsigned int SQ_DPP_ROW_SR6 = 0x00000116; - constexpr unsigned int SQ_DPP_ROW_SR7 = 0x00000117; - constexpr unsigned int SQ_DPP_ROW_SR8 = 0x00000118; - constexpr unsigned int SQ_DPP_ROW_SR9 = 0x00000119; - constexpr unsigned int SQ_DPP_ROW_SR10 = 0x0000011a; - constexpr unsigned int SQ_DPP_ROW_SR11 = 0x0000011b; - constexpr unsigned int SQ_DPP_ROW_SR12 = 0x0000011c; - constexpr unsigned int SQ_DPP_ROW_SR13 = 0x0000011d; - constexpr unsigned int SQ_DPP_ROW_SR14 = 0x0000011e; - constexpr unsigned int SQ_DPP_ROW_SR15 = 0x0000011f; - constexpr unsigned int SQ_DPP_WF_RL1 = 0x00000134; - constexpr unsigned int SQ_DPP_WF_RR1 = 0x0000013c; - constexpr unsigned int SQ_DPP_WF_SL1 = 0x00000130; - constexpr unsigned int SQ_DPP_WF_SR1 = 0x00000138; - constexpr unsigned int SQ_DS_ADD_F32 = 0x00000015; - constexpr unsigned int SQ_DS_ADD_RTN_F32 = 0x00000035; - constexpr unsigned int SQ_DS_ADD_RTN_U32 = 0x00000020; - constexpr unsigned int SQ_DS_ADD_RTN_U64 = 0x00000060; - constexpr unsigned int SQ_DS_ADD_SRC2_F32 = 0x00000095; - constexpr unsigned int SQ_DS_ADD_SRC2_U32 = 0x00000080; - constexpr unsigned int SQ_DS_ADD_SRC2_U64 = 0x000000c0; - constexpr unsigned int SQ_DS_ADD_U32 = 0x00000000; - constexpr unsigned int SQ_DS_ADD_U64 = 0x00000040; - constexpr unsigned int SQ_DS_AND_B32 = 0x00000009; - constexpr unsigned int SQ_DS_AND_B64 = 0x00000049; - constexpr unsigned int SQ_DS_AND_RTN_B32 = 0x00000029; - constexpr unsigned int SQ_DS_AND_RTN_B64 = 0x00000069; - constexpr unsigned int SQ_DS_AND_SRC2_B32 = 0x00000089; - constexpr unsigned int SQ_DS_AND_SRC2_B64 = 0x000000c9; - constexpr unsigned int SQ_DS_APPEND = 0x000000be; - constexpr unsigned int SQ_DS_BPERMUTE_B32 = 0x0000003f; - constexpr unsigned int SQ_DS_CMPST_B32 = 0x00000010; - constexpr unsigned int SQ_DS_CMPST_B64 = 0x00000050; - constexpr unsigned int SQ_DS_CMPST_F32 = 0x00000011; - constexpr unsigned int SQ_DS_CMPST_F64 = 0x00000051; - constexpr unsigned int SQ_DS_CMPST_RTN_B32 = 0x00000030; - constexpr unsigned int SQ_DS_CMPST_RTN_B64 = 0x00000070; - constexpr unsigned int SQ_DS_CMPST_RTN_F32 = 0x00000031; - constexpr unsigned int SQ_DS_CMPST_RTN_F64 = 0x00000071; - constexpr unsigned int SQ_DS_CONDXCHG32_RTN_B64 = 0x0000007e; - constexpr unsigned int SQ_DS_CONDXCHG32_RTN_B128 = 0x000000fd; - constexpr unsigned int SQ_DS_CONSUME = 0x000000bd; - constexpr unsigned int SQ_DS_DEC_RTN_U32 = 0x00000024; - constexpr unsigned int SQ_DS_DEC_RTN_U64 = 0x00000064; - constexpr unsigned int SQ_DS_DEC_SRC2_U32 = 0x00000084; - constexpr unsigned int SQ_DS_DEC_SRC2_U64 = 0x000000c4; - constexpr unsigned int SQ_DS_DEC_U32 = 0x00000004; - constexpr unsigned int SQ_DS_DEC_U64 = 0x00000044; - constexpr unsigned int SQ_DS_GWS_BARRIER = 0x0000009d; - constexpr unsigned int SQ_DS_GWS_INIT = 0x00000099; - constexpr unsigned int SQ_DS_GWS_SEMA_BR = 0x0000009b; - constexpr unsigned int SQ_DS_GWS_SEMA_P = 0x0000009c; - constexpr unsigned int SQ_DS_GWS_SEMA_RELEASE_ALL = 0x00000098; - constexpr unsigned int SQ_DS_GWS_SEMA_V = 0x0000009a; - constexpr unsigned int SQ_DS_INC_RTN_U32 = 0x00000023; - constexpr unsigned int SQ_DS_INC_RTN_U64 = 0x00000063; - constexpr unsigned int SQ_DS_INC_SRC2_U32 = 0x00000083; - constexpr unsigned int SQ_DS_INC_SRC2_U64 = 0x000000c3; - constexpr unsigned int SQ_DS_INC_U32 = 0x00000003; - constexpr unsigned int SQ_DS_INC_U64 = 0x00000043; - constexpr unsigned int SQ_DS_MAX_F32 = 0x00000013; - constexpr unsigned int SQ_DS_MAX_F64 = 0x00000053; - constexpr unsigned int SQ_DS_MAX_I32 = 0x00000006; - constexpr unsigned int SQ_DS_MAX_I64 = 0x00000046; - constexpr unsigned int SQ_DS_MAX_RTN_F32 = 0x00000033; - constexpr unsigned int SQ_DS_MAX_RTN_F64 = 0x00000073; - constexpr unsigned int SQ_DS_MAX_RTN_I32 = 0x00000026; - constexpr unsigned int SQ_DS_MAX_RTN_I64 = 0x00000066; - constexpr unsigned int SQ_DS_MAX_RTN_U32 = 0x00000028; - constexpr unsigned int SQ_DS_MAX_RTN_U64 = 0x00000068; - constexpr unsigned int SQ_DS_MAX_SRC2_F32 = 0x00000093; - constexpr unsigned int SQ_DS_MAX_SRC2_F64 = 0x000000d3; - constexpr unsigned int SQ_DS_MAX_SRC2_I32 = 0x00000086; - constexpr unsigned int SQ_DS_MAX_SRC2_I64 = 0x000000c6; - constexpr unsigned int SQ_DS_MAX_SRC2_U32 = 0x00000088; - constexpr unsigned int SQ_DS_MAX_SRC2_U64 = 0x000000c8; - constexpr unsigned int SQ_DS_MAX_U32 = 0x00000008; - constexpr unsigned int SQ_DS_MAX_U64 = 0x00000048; - constexpr unsigned int SQ_DS_MIN_F32 = 0x00000012; - constexpr unsigned int SQ_DS_MIN_F64 = 0x00000052; - constexpr unsigned int SQ_DS_MIN_I32 = 0x00000005; - constexpr unsigned int SQ_DS_MIN_I64 = 0x00000045; - constexpr unsigned int SQ_DS_MIN_RTN_F32 = 0x00000032; - constexpr unsigned int SQ_DS_MIN_RTN_F64 = 0x00000072; - constexpr unsigned int SQ_DS_MIN_RTN_I32 = 0x00000025; - constexpr unsigned int SQ_DS_MIN_RTN_I64 = 0x00000065; - constexpr unsigned int SQ_DS_MIN_RTN_U32 = 0x00000027; - constexpr unsigned int SQ_DS_MIN_RTN_U64 = 0x00000067; - constexpr unsigned int SQ_DS_MIN_SRC2_F32 = 0x00000092; - constexpr unsigned int SQ_DS_MIN_SRC2_F64 = 0x000000d2; - constexpr unsigned int SQ_DS_MIN_SRC2_I32 = 0x00000085; - constexpr unsigned int SQ_DS_MIN_SRC2_I64 = 0x000000c5; - constexpr unsigned int SQ_DS_MIN_SRC2_U32 = 0x00000087; - constexpr unsigned int SQ_DS_MIN_SRC2_U64 = 0x000000c7; - constexpr unsigned int SQ_DS_MIN_U32 = 0x00000007; - constexpr unsigned int SQ_DS_MIN_U64 = 0x00000047; - constexpr unsigned int SQ_DS_MSKOR_B32 = 0x0000000c; - constexpr unsigned int SQ_DS_MSKOR_B64 = 0x0000004c; - constexpr unsigned int SQ_DS_MSKOR_RTN_B32 = 0x0000002c; - constexpr unsigned int SQ_DS_MSKOR_RTN_B64 = 0x0000006c; - constexpr unsigned int SQ_DS_NOP = 0x00000014; - constexpr unsigned int SQ_DS_ORDERED_COUNT = 0x000000bf; - constexpr unsigned int SQ_DS_OR_B32 = 0x0000000a; - constexpr unsigned int SQ_DS_OR_B64 = 0x0000004a; - constexpr unsigned int SQ_DS_OR_RTN_B32 = 0x0000002a; - constexpr unsigned int SQ_DS_OR_RTN_B64 = 0x0000006a; - constexpr unsigned int SQ_DS_OR_SRC2_B32 = 0x0000008a; - constexpr unsigned int SQ_DS_OR_SRC2_B64 = 0x000000ca; - constexpr unsigned int SQ_DS_PERMUTE_B32 = 0x0000003e; - constexpr unsigned int SQ_DS_READ2ST64_B32 = 0x00000038; - constexpr unsigned int SQ_DS_READ2ST64_B64 = 0x00000078; - constexpr unsigned int SQ_DS_READ2_B32 = 0x00000037; - constexpr unsigned int SQ_DS_READ2_B64 = 0x00000077; - constexpr unsigned int SQ_DS_READ_ADDTID_B32 = 0x000000b6; - constexpr unsigned int SQ_DS_READ_B32 = 0x00000036; - constexpr unsigned int SQ_DS_READ_B64 = 0x00000076; - constexpr unsigned int SQ_DS_READ_B96 = 0x000000fe; - constexpr unsigned int SQ_DS_READ_B128 = 0x000000ff; - constexpr unsigned int SQ_DS_READ_I8 = 0x00000039; - constexpr unsigned int SQ_DS_READ_I8_D16 = 0x00000058; - constexpr unsigned int SQ_DS_READ_I8_D16_HI = 0x00000059; - constexpr unsigned int SQ_DS_READ_I16 = 0x0000003b; - constexpr unsigned int SQ_DS_READ_U8 = 0x0000003a; - constexpr unsigned int SQ_DS_READ_U8_D16 = 0x00000056; - constexpr unsigned int SQ_DS_READ_U8_D16_HI = 0x00000057; - constexpr unsigned int SQ_DS_READ_U16 = 0x0000003c; - constexpr unsigned int SQ_DS_READ_U16_D16 = 0x0000005a; - constexpr unsigned int SQ_DS_READ_U16_D16_HI = 0x0000005b; - constexpr unsigned int SQ_DS_RSUB_RTN_U32 = 0x00000022; - constexpr unsigned int SQ_DS_RSUB_RTN_U64 = 0x00000062; - constexpr unsigned int SQ_DS_RSUB_SRC2_U32 = 0x00000082; - constexpr unsigned int SQ_DS_RSUB_SRC2_U64 = 0x000000c2; - constexpr unsigned int SQ_DS_RSUB_U32 = 0x00000002; - constexpr unsigned int SQ_DS_RSUB_U64 = 0x00000042; - constexpr unsigned int SQ_DS_SUB_RTN_U32 = 0x00000021; - constexpr unsigned int SQ_DS_SUB_RTN_U64 = 0x00000061; - constexpr unsigned int SQ_DS_SUB_SRC2_U32 = 0x00000081; - constexpr unsigned int SQ_DS_SUB_SRC2_U64 = 0x000000c1; - constexpr unsigned int SQ_DS_SUB_U32 = 0x00000001; - constexpr unsigned int SQ_DS_SUB_U64 = 0x00000041; - constexpr unsigned int SQ_DS_SWIZZLE_B32 = 0x0000003d; - constexpr unsigned int SQ_DS_WRAP_RTN_B32 = 0x00000034; - constexpr unsigned int SQ_DS_WRITE2ST64_B32 = 0x0000000f; - constexpr unsigned int SQ_DS_WRITE2ST64_B64 = 0x0000004f; - constexpr unsigned int SQ_DS_WRITE2_B32 = 0x0000000e; - constexpr unsigned int SQ_DS_WRITE2_B64 = 0x0000004e; - constexpr unsigned int SQ_DS_WRITE_ADDTID_B32 = 0x0000001d; - constexpr unsigned int SQ_DS_WRITE_B8 = 0x0000001e; - constexpr unsigned int SQ_DS_WRITE_B8_D16_HI = 0x00000054; - constexpr unsigned int SQ_DS_WRITE_B16 = 0x0000001f; - constexpr unsigned int SQ_DS_WRITE_B16_D16_HI = 0x00000055; - constexpr unsigned int SQ_DS_WRITE_B32 = 0x0000000d; - constexpr unsigned int SQ_DS_WRITE_B64 = 0x0000004d; - constexpr unsigned int SQ_DS_WRITE_B96 = 0x000000de; - constexpr unsigned int SQ_DS_WRITE_B128 = 0x000000df; - constexpr unsigned int SQ_DS_WRITE_SRC2_B32 = 0x0000008d; - constexpr unsigned int SQ_DS_WRITE_SRC2_B64 = 0x000000cd; - constexpr unsigned int SQ_DS_WRXCHG2ST64_RTN_B32 = 0x0000002f; - constexpr unsigned int SQ_DS_WRXCHG2ST64_RTN_B64 = 0x0000006f; - constexpr unsigned int SQ_DS_WRXCHG2_RTN_B32 = 0x0000002e; - constexpr unsigned int SQ_DS_WRXCHG2_RTN_B64 = 0x0000006e; - constexpr unsigned int SQ_DS_WRXCHG_RTN_B32 = 0x0000002d; - constexpr unsigned int SQ_DS_WRXCHG_RTN_B64 = 0x0000006d; - constexpr unsigned int SQ_DS_XOR_B32 = 0x0000000b; - constexpr unsigned int SQ_DS_XOR_B64 = 0x0000004b; - constexpr unsigned int SQ_DS_XOR_RTN_B32 = 0x0000002b; - constexpr unsigned int SQ_DS_XOR_RTN_B64 = 0x0000006b; - constexpr unsigned int SQ_DS_XOR_SRC2_B32 = 0x0000008b; - constexpr unsigned int SQ_DS_XOR_SRC2_B64 = 0x000000cb; - constexpr unsigned int SQ_ENC_DS_BITS = 0xd8000000; - constexpr unsigned int SQ_ENC_DS_FIELD = 0x00000036; - constexpr unsigned int SQ_ENC_DS_MASK = 0xfc000000; - constexpr unsigned int SQ_ENC_EXP_BITS = 0xc4000000; - constexpr unsigned int SQ_ENC_EXP_FIELD = 0x00000031; - constexpr unsigned int SQ_ENC_EXP_MASK = 0xfc000000; - constexpr unsigned int SQ_ENC_FLAT_BITS = 0xdc000000; - constexpr unsigned int SQ_ENC_FLAT_FIELD = 0x00000037; - constexpr unsigned int SQ_ENC_FLAT_MASK = 0xfc000000; - constexpr unsigned int SQ_ENC_MIMG_BITS = 0xf0000000; - constexpr unsigned int SQ_ENC_MIMG_FIELD = 0x0000003c; - constexpr unsigned int SQ_ENC_MIMG_MASK = 0xfc000000; - constexpr unsigned int SQ_ENC_MTBUF_BITS = 0xe8000000; - constexpr unsigned int SQ_ENC_MTBUF_FIELD = 0x0000003a; - constexpr unsigned int SQ_ENC_MTBUF_MASK = 0xfc000000; - constexpr unsigned int SQ_ENC_MUBUF_BITS = 0xe0000000; - constexpr unsigned int SQ_ENC_MUBUF_FIELD = 0x00000038; - constexpr unsigned int SQ_ENC_MUBUF_MASK = 0xfc000000; - constexpr unsigned int SQ_ENC_SMEM_BITS = 0xc0000000; - constexpr unsigned int SQ_ENC_SMEM_FIELD = 0x00000030; - constexpr unsigned int SQ_ENC_SMEM_MASK = 0xfc000000; - constexpr unsigned int SQ_ENC_SOP1_BITS = 0xbe800000; - constexpr unsigned int SQ_ENC_SOP1_FIELD = 0x0000017d; - constexpr unsigned int SQ_ENC_SOP1_MASK = 0xff800000; - constexpr unsigned int SQ_ENC_SOP2_BITS = 0x80000000; - constexpr unsigned int SQ_ENC_SOP2_FIELD = 0x00000002; - constexpr unsigned int SQ_ENC_SOP2_MASK = 0xc0000000; - constexpr unsigned int SQ_ENC_SOPC_BITS = 0xbf000000; - constexpr unsigned int SQ_ENC_SOPC_FIELD = 0x0000017e; - constexpr unsigned int SQ_ENC_SOPC_MASK = 0xff800000; - constexpr unsigned int SQ_ENC_SOPK_BITS = 0xb0000000; - constexpr unsigned int SQ_ENC_SOPK_FIELD = 0x0000000b; - constexpr unsigned int SQ_ENC_SOPK_MASK = 0xf0000000; - constexpr unsigned int SQ_ENC_SOPP_BITS = 0xbf800000; - constexpr unsigned int SQ_ENC_SOPP_FIELD = 0x0000017f; - constexpr unsigned int SQ_ENC_SOPP_MASK = 0xff800000; - constexpr unsigned int SQ_ENC_VINTRP_BITS = 0xd4000000; - constexpr unsigned int SQ_ENC_VINTRP_FIELD = 0x00000035; - constexpr unsigned int SQ_ENC_VINTRP_MASK = 0xfc000000; - constexpr unsigned int SQ_ENC_VOP1_BITS = 0x7e000000; - constexpr unsigned int SQ_ENC_VOP1_FIELD = 0x0000003f; - constexpr unsigned int SQ_ENC_VOP1_MASK = 0xfe000000; - constexpr unsigned int SQ_ENC_VOP2_BITS = 0x00000000; - constexpr unsigned int SQ_ENC_VOP2_FIELD = 0x00000000; - constexpr unsigned int SQ_ENC_VOP2_MASK = 0x80000000; - constexpr unsigned int SQ_ENC_VOP3P_BITS = 0xd3800000; - constexpr unsigned int SQ_ENC_VOP3P_FIELD = 0x000001a7; - constexpr unsigned int SQ_ENC_VOP3P_MASK = 0xff800000; - constexpr unsigned int SQ_ENC_VOP3_BITS = 0xd0000000; - constexpr unsigned int SQ_ENC_VOP3_FIELD = 0x00000034; - constexpr unsigned int SQ_ENC_VOP3_MASK = 0xfc000000; - constexpr unsigned int SQ_ENC_VOPC_BITS = 0x7c000000; - constexpr unsigned int SQ_ENC_VOPC_FIELD = 0x0000003e; - constexpr unsigned int SQ_ENC_VOPC_MASK = 0xfe000000; - constexpr unsigned int SQ_EQ = 0x00000002; - constexpr unsigned int SQ_EXEC_HI = 0x0000007f; - constexpr unsigned int SQ_EXEC_LO = 0x0000007e; - constexpr unsigned int SQ_EXP = 0x00000000; - constexpr unsigned int SQ_EXP_GDS0 = 0x00000018; - constexpr unsigned int SQ_EXP_MRT0 = 0x00000000; - constexpr unsigned int SQ_EXP_MRTZ = 0x00000008; - constexpr unsigned int SQ_EXP_NULL = 0x00000009; - constexpr unsigned int SQ_EXP_NUM_GDS = 0x00000005; - constexpr unsigned int SQ_EXP_NUM_MRT = 0x00000008; - constexpr unsigned int SQ_EXP_NUM_PARAM = 0x00000020; - constexpr unsigned int SQ_EXP_NUM_POS = 0x00000004; - constexpr unsigned int SQ_EXP_PARAM0 = 0x00000020; - constexpr unsigned int SQ_EXP_POS0 = 0x0000000c; - constexpr unsigned int SQ_F = 0x00000000; - constexpr unsigned int SQ_FLAT = 0x00000000; - constexpr unsigned int SQ_FLAT_ATOMIC_ADD = 0x00000042; - constexpr unsigned int SQ_FLAT_ATOMIC_ADD_X2 = 0x00000062; - constexpr unsigned int SQ_FLAT_ATOMIC_AND = 0x00000048; - constexpr unsigned int SQ_FLAT_ATOMIC_AND_X2 = 0x00000068; - constexpr unsigned int SQ_FLAT_ATOMIC_CMPSWAP = 0x00000041; - constexpr unsigned int SQ_FLAT_ATOMIC_CMPSWAP_X2 = 0x00000061; - constexpr unsigned int SQ_FLAT_ATOMIC_DEC = 0x0000004c; - constexpr unsigned int SQ_FLAT_ATOMIC_DEC_X2 = 0x0000006c; - constexpr unsigned int SQ_FLAT_ATOMIC_INC = 0x0000004b; - constexpr unsigned int SQ_FLAT_ATOMIC_INC_X2 = 0x0000006b; - constexpr unsigned int SQ_FLAT_ATOMIC_OR = 0x00000049; - constexpr unsigned int SQ_FLAT_ATOMIC_OR_X2 = 0x00000069; - constexpr unsigned int SQ_FLAT_ATOMIC_SMAX = 0x00000046; - constexpr unsigned int SQ_FLAT_ATOMIC_SMAX_X2 = 0x00000066; - constexpr unsigned int SQ_FLAT_ATOMIC_SMIN = 0x00000044; - constexpr unsigned int SQ_FLAT_ATOMIC_SMIN_X2 = 0x00000064; - constexpr unsigned int SQ_FLAT_ATOMIC_SUB = 0x00000043; - constexpr unsigned int SQ_FLAT_ATOMIC_SUB_X2 = 0x00000063; - constexpr unsigned int SQ_FLAT_ATOMIC_SWAP = 0x00000040; - constexpr unsigned int SQ_FLAT_ATOMIC_SWAP_X2 = 0x00000060; - constexpr unsigned int SQ_FLAT_ATOMIC_UMAX = 0x00000047; - constexpr unsigned int SQ_FLAT_ATOMIC_UMAX_X2 = 0x00000067; - constexpr unsigned int SQ_FLAT_ATOMIC_UMIN = 0x00000045; - constexpr unsigned int SQ_FLAT_ATOMIC_UMIN_X2 = 0x00000065; - constexpr unsigned int SQ_FLAT_ATOMIC_XOR = 0x0000004a; - constexpr unsigned int SQ_FLAT_ATOMIC_XOR_X2 = 0x0000006a; - constexpr unsigned int SQ_FLAT_LOAD_DWORD = 0x00000014; - constexpr unsigned int SQ_FLAT_LOAD_DWORDX2 = 0x00000015; - constexpr unsigned int SQ_FLAT_LOAD_DWORDX3 = 0x00000016; - constexpr unsigned int SQ_FLAT_LOAD_DWORDX4 = 0x00000017; - constexpr unsigned int SQ_FLAT_LOAD_SBYTE = 0x00000011; - constexpr unsigned int SQ_FLAT_LOAD_SBYTE_D16 = 0x00000022; - constexpr unsigned int SQ_FLAT_LOAD_SBYTE_D16_HI = 0x00000023; - constexpr unsigned int SQ_FLAT_LOAD_SHORT_D16 = 0x00000024; - constexpr unsigned int SQ_FLAT_LOAD_SHORT_D16_HI = 0x00000025; - constexpr unsigned int SQ_FLAT_LOAD_SSHORT = 0x00000013; - constexpr unsigned int SQ_FLAT_LOAD_UBYTE = 0x00000010; - constexpr unsigned int SQ_FLAT_LOAD_UBYTE_D16 = 0x00000020; - constexpr unsigned int SQ_FLAT_LOAD_UBYTE_D16_HI = 0x00000021; - constexpr unsigned int SQ_FLAT_LOAD_USHORT = 0x00000012; - constexpr unsigned int SQ_FLAT_SCRATCH_HI = 0x00000067; - constexpr unsigned int SQ_FLAT_SCRATCH_LO = 0x00000066; - constexpr unsigned int SQ_FLAT_STORE_BYTE = 0x00000018; - constexpr unsigned int SQ_FLAT_STORE_BYTE_D16_HI = 0x00000019; - constexpr unsigned int SQ_FLAT_STORE_DWORD = 0x0000001c; - constexpr unsigned int SQ_FLAT_STORE_DWORDX2 = 0x0000001d; - constexpr unsigned int SQ_FLAT_STORE_DWORDX3 = 0x0000001e; - constexpr unsigned int SQ_FLAT_STORE_DWORDX4 = 0x0000001f; - constexpr unsigned int SQ_FLAT_STORE_SHORT = 0x0000001a; - constexpr unsigned int SQ_FLAT_STORE_SHORT_D16_HI = 0x0000001b; - constexpr unsigned int SQ_GE = 0x00000006; - constexpr unsigned int SQ_GLOBAL = 0x00000002; - constexpr unsigned int SQ_GLOBAL_ATOMIC_ADD = 0x00000042; - constexpr unsigned int SQ_GLOBAL_ATOMIC_ADD_X2 = 0x00000062; - constexpr unsigned int SQ_GLOBAL_ATOMIC_AND = 0x00000048; - constexpr unsigned int SQ_GLOBAL_ATOMIC_AND_X2 = 0x00000068; - constexpr unsigned int SQ_GLOBAL_ATOMIC_CMPSWAP = 0x00000041; - constexpr unsigned int SQ_GLOBAL_ATOMIC_CMPSWAP_X2 = 0x00000061; - constexpr unsigned int SQ_GLOBAL_ATOMIC_DEC = 0x0000004c; - constexpr unsigned int SQ_GLOBAL_ATOMIC_DEC_X2 = 0x0000006c; - constexpr unsigned int SQ_GLOBAL_ATOMIC_INC = 0x0000004b; - constexpr unsigned int SQ_GLOBAL_ATOMIC_INC_X2 = 0x0000006b; - constexpr unsigned int SQ_GLOBAL_ATOMIC_OR = 0x00000049; - constexpr unsigned int SQ_GLOBAL_ATOMIC_OR_X2 = 0x00000069; - constexpr unsigned int SQ_GLOBAL_ATOMIC_SMAX = 0x00000046; - constexpr unsigned int SQ_GLOBAL_ATOMIC_SMAX_X2 = 0x00000066; - constexpr unsigned int SQ_GLOBAL_ATOMIC_SMIN = 0x00000044; - constexpr unsigned int SQ_GLOBAL_ATOMIC_SMIN_X2 = 0x00000064; - constexpr unsigned int SQ_GLOBAL_ATOMIC_SUB = 0x00000043; - constexpr unsigned int SQ_GLOBAL_ATOMIC_SUB_X2 = 0x00000063; - constexpr unsigned int SQ_GLOBAL_ATOMIC_SWAP = 0x00000040; - constexpr unsigned int SQ_GLOBAL_ATOMIC_SWAP_X2 = 0x00000060; - constexpr unsigned int SQ_GLOBAL_ATOMIC_UMAX = 0x00000047; - constexpr unsigned int SQ_GLOBAL_ATOMIC_UMAX_X2 = 0x00000067; - constexpr unsigned int SQ_GLOBAL_ATOMIC_UMIN = 0x00000045; - constexpr unsigned int SQ_GLOBAL_ATOMIC_UMIN_X2 = 0x00000065; - constexpr unsigned int SQ_GLOBAL_ATOMIC_XOR = 0x0000004a; - constexpr unsigned int SQ_GLOBAL_ATOMIC_XOR_X2 = 0x0000006a; - constexpr unsigned int SQ_GLOBAL_LOAD_DWORD = 0x00000014; - constexpr unsigned int SQ_GLOBAL_LOAD_DWORDX2 = 0x00000015; - constexpr unsigned int SQ_GLOBAL_LOAD_DWORDX3 = 0x00000016; - constexpr unsigned int SQ_GLOBAL_LOAD_DWORDX4 = 0x00000017; - constexpr unsigned int SQ_GLOBAL_LOAD_SBYTE = 0x00000011; - constexpr unsigned int SQ_GLOBAL_LOAD_SBYTE_D16 = 0x00000022; - constexpr unsigned int SQ_GLOBAL_LOAD_SBYTE_D16_HI = 0x00000023; - constexpr unsigned int SQ_GLOBAL_LOAD_SHORT_D16 = 0x00000024; - constexpr unsigned int SQ_GLOBAL_LOAD_SHORT_D16_HI = 0x00000025; - constexpr unsigned int SQ_GLOBAL_LOAD_SSHORT = 0x00000013; - constexpr unsigned int SQ_GLOBAL_LOAD_UBYTE = 0x00000010; - constexpr unsigned int SQ_GLOBAL_LOAD_UBYTE_D16 = 0x00000020; - constexpr unsigned int SQ_GLOBAL_LOAD_UBYTE_D16_HI = 0x00000021; - constexpr unsigned int SQ_GLOBAL_LOAD_USHORT = 0x00000012; - constexpr unsigned int SQ_GLOBAL_STORE_BYTE = 0x00000018; - constexpr unsigned int SQ_GLOBAL_STORE_BYTE_D16_HI = 0x00000019; - constexpr unsigned int SQ_GLOBAL_STORE_DWORD = 0x0000001c; - constexpr unsigned int SQ_GLOBAL_STORE_DWORDX2 = 0x0000001d; - constexpr unsigned int SQ_GLOBAL_STORE_DWORDX3 = 0x0000001e; - constexpr unsigned int SQ_GLOBAL_STORE_DWORDX4 = 0x0000001f; - constexpr unsigned int SQ_GLOBAL_STORE_SHORT = 0x0000001a; - constexpr unsigned int SQ_GLOBAL_STORE_SHORT_D16_HI = 0x0000001b; - constexpr unsigned int SQ_GS_OP_CUT = 0x00000001; - constexpr unsigned int SQ_GS_OP_EMIT = 0x00000002; - constexpr unsigned int SQ_GS_OP_EMIT_CUT = 0x00000003; - constexpr unsigned int SQ_GS_OP_NOP = 0x00000000; - constexpr unsigned int SQ_GT = 0x00000004; - constexpr unsigned int SQ_HWREG_ID_SHIFT = 0x00000000; - constexpr unsigned int SQ_HWREG_ID_SIZE = 0x00000006; - constexpr unsigned int SQ_HWREG_OFFSET_SHIFT = 0x00000006; - constexpr unsigned int SQ_HWREG_OFFSET_SIZE = 0x00000005; - constexpr unsigned int SQ_HWREG_SIZE_SHIFT = 0x0000000b; - constexpr unsigned int SQ_HWREG_SIZE_SIZE = 0x00000005; - constexpr unsigned int SQ_HW_REG_FLUSH_IB = 0x0000000e; - constexpr unsigned int SQ_HW_REG_GPR_ALLOC = 0x00000005; - constexpr unsigned int SQ_HW_REG_HW_ID = 0x00000004; - constexpr unsigned int SQ_HW_REG_IB_DBG0 = 0x0000000c; - constexpr unsigned int SQ_HW_REG_IB_DBG1 = 0x0000000d; - constexpr unsigned int SQ_HW_REG_IB_STS = 0x00000007; - constexpr unsigned int SQ_HW_REG_INST_DW0 = 0x0000000a; - constexpr unsigned int SQ_HW_REG_INST_DW1 = 0x0000000b; - constexpr unsigned int SQ_HW_REG_LDS_ALLOC = 0x00000006; - constexpr unsigned int SQ_HW_REG_MODE = 0x00000001; - constexpr unsigned int SQ_HW_REG_PC_HI = 0x00000009; - constexpr unsigned int SQ_HW_REG_PC_LO = 0x00000008; - constexpr unsigned int SQ_HW_REG_SH_MEM_BASES = 0x0000000f; - constexpr unsigned int SQ_HW_REG_SQ_SHADER_TBA_HI = 0x00000011; - constexpr unsigned int SQ_HW_REG_SQ_SHADER_TBA_LO = 0x00000010; - constexpr unsigned int SQ_HW_REG_SQ_SHADER_TMA_HI = 0x00000013; - constexpr unsigned int SQ_HW_REG_SQ_SHADER_TMA_LO = 0x00000012; - constexpr unsigned int SQ_HW_REG_STATUS = 0x00000002; - constexpr unsigned int SQ_HW_REG_TRAPSTS = 0x00000003; - constexpr unsigned int SQ_IMAGE_ATOMIC_ADD = 0x00000012; - constexpr unsigned int SQ_IMAGE_ATOMIC_AND = 0x00000018; - constexpr unsigned int SQ_IMAGE_ATOMIC_CMPSWAP = 0x00000011; - constexpr unsigned int SQ_IMAGE_ATOMIC_DEC = 0x0000001c; - constexpr unsigned int SQ_IMAGE_ATOMIC_INC = 0x0000001b; - constexpr unsigned int SQ_IMAGE_ATOMIC_OR = 0x00000019; - constexpr unsigned int SQ_IMAGE_ATOMIC_SMAX = 0x00000016; - constexpr unsigned int SQ_IMAGE_ATOMIC_SMIN = 0x00000014; - constexpr unsigned int SQ_IMAGE_ATOMIC_SUB = 0x00000013; - constexpr unsigned int SQ_IMAGE_ATOMIC_SWAP = 0x00000010; - constexpr unsigned int SQ_IMAGE_ATOMIC_UMAX = 0x00000017; - constexpr unsigned int SQ_IMAGE_ATOMIC_UMIN = 0x00000015; - constexpr unsigned int SQ_IMAGE_ATOMIC_XOR = 0x0000001a; - constexpr unsigned int SQ_IMAGE_GATHER4 = 0x00000040; - constexpr unsigned int SQ_IMAGE_GATHER4H = 0x00000042; - constexpr unsigned int SQ_IMAGE_GATHER4H_PCK = 0x0000004a; - constexpr unsigned int SQ_IMAGE_GATHER4_B = 0x00000045; - constexpr unsigned int SQ_IMAGE_GATHER4_B_CL = 0x00000046; - constexpr unsigned int SQ_IMAGE_GATHER4_B_CL_O = 0x00000056; - constexpr unsigned int SQ_IMAGE_GATHER4_B_O = 0x00000055; - constexpr unsigned int SQ_IMAGE_GATHER4_C = 0x00000048; - constexpr unsigned int SQ_IMAGE_GATHER4_CL = 0x00000041; - constexpr unsigned int SQ_IMAGE_GATHER4_CL_O = 0x00000051; - constexpr unsigned int SQ_IMAGE_GATHER4_C_B = 0x0000004d; - constexpr unsigned int SQ_IMAGE_GATHER4_C_B_CL = 0x0000004e; - constexpr unsigned int SQ_IMAGE_GATHER4_C_B_CL_O = 0x0000005e; - constexpr unsigned int SQ_IMAGE_GATHER4_C_B_O = 0x0000005d; - constexpr unsigned int SQ_IMAGE_GATHER4_C_CL = 0x00000049; - constexpr unsigned int SQ_IMAGE_GATHER4_C_CL_O = 0x00000059; - constexpr unsigned int SQ_IMAGE_GATHER4_C_L = 0x0000004c; - constexpr unsigned int SQ_IMAGE_GATHER4_C_LZ = 0x0000004f; - constexpr unsigned int SQ_IMAGE_GATHER4_C_LZ_O = 0x0000005f; - constexpr unsigned int SQ_IMAGE_GATHER4_C_L_O = 0x0000005c; - constexpr unsigned int SQ_IMAGE_GATHER4_C_O = 0x00000058; - constexpr unsigned int SQ_IMAGE_GATHER4_L = 0x00000044; - constexpr unsigned int SQ_IMAGE_GATHER4_LZ = 0x00000047; - constexpr unsigned int SQ_IMAGE_GATHER4_LZ_O = 0x00000057; - constexpr unsigned int SQ_IMAGE_GATHER4_L_O = 0x00000054; - constexpr unsigned int SQ_IMAGE_GATHER4_O = 0x00000050; - constexpr unsigned int SQ_IMAGE_GATHER8H_PCK = 0x0000004b; - constexpr unsigned int SQ_IMAGE_GET_LOD = 0x00000060; - constexpr unsigned int SQ_IMAGE_GET_RESINFO = 0x0000000e; - constexpr unsigned int SQ_IMAGE_LOAD = 0x00000000; - constexpr unsigned int SQ_IMAGE_LOAD_MIP = 0x00000001; - constexpr unsigned int SQ_IMAGE_LOAD_MIP_PCK = 0x00000004; - constexpr unsigned int SQ_IMAGE_LOAD_MIP_PCK_SGN = 0x00000005; - constexpr unsigned int SQ_IMAGE_LOAD_PCK = 0x00000002; - constexpr unsigned int SQ_IMAGE_LOAD_PCK_SGN = 0x00000003; - constexpr unsigned int SQ_IMAGE_RSRC256 = 0x0000007e; - constexpr unsigned int SQ_IMAGE_SAMPLE = 0x00000020; - constexpr unsigned int SQ_IMAGE_SAMPLER = 0x0000007f; - constexpr unsigned int SQ_IMAGE_SAMPLE_B = 0x00000025; - constexpr unsigned int SQ_IMAGE_SAMPLE_B_CL = 0x00000026; - constexpr unsigned int SQ_IMAGE_SAMPLE_B_CL_O = 0x00000036; - constexpr unsigned int SQ_IMAGE_SAMPLE_B_O = 0x00000035; - constexpr unsigned int SQ_IMAGE_SAMPLE_C = 0x00000028; - constexpr unsigned int SQ_IMAGE_SAMPLE_CD = 0x00000068; - constexpr unsigned int SQ_IMAGE_SAMPLE_CD_CL = 0x00000069; - constexpr unsigned int SQ_IMAGE_SAMPLE_CD_CL_O = 0x0000006d; - constexpr unsigned int SQ_IMAGE_SAMPLE_CD_O = 0x0000006c; - constexpr unsigned int SQ_IMAGE_SAMPLE_CL = 0x00000021; - constexpr unsigned int SQ_IMAGE_SAMPLE_CL_O = 0x00000031; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_B = 0x0000002d; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_B_CL = 0x0000002e; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_B_CL_O = 0x0000003e; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_B_O = 0x0000003d; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_CD = 0x0000006a; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_CD_CL = 0x0000006b; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_CD_CL_O = 0x0000006f; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_CD_O = 0x0000006e; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_CL = 0x00000029; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_CL_O = 0x00000039; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_D = 0x0000002a; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_D_CL = 0x0000002b; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_D_CL_O = 0x0000003b; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_D_O = 0x0000003a; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_L = 0x0000002c; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_LZ = 0x0000002f; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_LZ_O = 0x0000003f; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_L_O = 0x0000003c; - constexpr unsigned int SQ_IMAGE_SAMPLE_C_O = 0x00000038; - constexpr unsigned int SQ_IMAGE_SAMPLE_D = 0x00000022; - constexpr unsigned int SQ_IMAGE_SAMPLE_D_CL = 0x00000023; - constexpr unsigned int SQ_IMAGE_SAMPLE_D_CL_O = 0x00000033; - constexpr unsigned int SQ_IMAGE_SAMPLE_D_O = 0x00000032; - constexpr unsigned int SQ_IMAGE_SAMPLE_L = 0x00000024; - constexpr unsigned int SQ_IMAGE_SAMPLE_LZ = 0x00000027; - constexpr unsigned int SQ_IMAGE_SAMPLE_LZ_O = 0x00000037; - constexpr unsigned int SQ_IMAGE_SAMPLE_L_O = 0x00000034; - constexpr unsigned int SQ_IMAGE_SAMPLE_O = 0x00000030; - constexpr unsigned int SQ_IMAGE_STORE = 0x00000008; - constexpr unsigned int SQ_IMAGE_STORE_MIP = 0x00000009; - constexpr unsigned int SQ_IMAGE_STORE_MIP_PCK = 0x0000000b; - constexpr unsigned int SQ_IMAGE_STORE_PCK = 0x0000000a; - constexpr unsigned int SQ_L1 = 0x00000001; - constexpr unsigned int SQ_L2 = 0x00000002; - constexpr unsigned int SQ_L3 = 0x00000003; - constexpr unsigned int SQ_L4 = 0x00000004; - constexpr unsigned int SQ_L5 = 0x00000005; - constexpr unsigned int SQ_L6 = 0x00000006; - constexpr unsigned int SQ_L7 = 0x00000007; - constexpr unsigned int SQ_L8 = 0x00000008; - constexpr unsigned int SQ_L9 = 0x00000009; - constexpr unsigned int SQ_L10 = 0x0000000a; - constexpr unsigned int SQ_L11 = 0x0000000b; - constexpr unsigned int SQ_L12 = 0x0000000c; - constexpr unsigned int SQ_L13 = 0x0000000d; - constexpr unsigned int SQ_L14 = 0x0000000e; - constexpr unsigned int SQ_L15 = 0x0000000f; - constexpr unsigned int SQ_LE = 0x00000003; - constexpr unsigned int SQ_LG = 0x00000005; - constexpr unsigned int SQ_LT = 0x00000001; - constexpr unsigned int SQ_M0 = 0x0000007c; - constexpr unsigned int SQ_MSG_EARLY_PRIM_DEALLOC = 0x00000008; - constexpr unsigned int SQ_MSG_GET_DOORBELL = 0x0000000a; - constexpr unsigned int SQ_MSG_GS = 0x00000002; - constexpr unsigned int SQ_MSG_GS_ALLOC_REQ = 0x00000009; - constexpr unsigned int SQ_MSG_GS_DONE = 0x00000003; - constexpr unsigned int SQ_MSG_HALT_WAVES = 0x00000006; - constexpr unsigned int SQ_MSG_INTERRUPT = 0x00000001; - constexpr unsigned int SQ_MSG_ORDERED_PS_DONE = 0x00000007; - constexpr unsigned int SQ_MSG_SAVEWAVE = 0x00000004; - constexpr unsigned int SQ_MSG_STALL_WAVE_GEN = 0x00000005; - constexpr unsigned int SQ_MSG_SYSMSG = 0x0000000f; - constexpr unsigned int SQ_NE = 0x00000005; - constexpr unsigned int SQ_NEQ = 0x0000000d; - constexpr unsigned int SQ_NGE = 0x00000009; - constexpr unsigned int SQ_NGT = 0x0000000b; - constexpr unsigned int SQ_NLE = 0x0000000c; - constexpr unsigned int SQ_NLG = 0x0000000a; - constexpr unsigned int SQ_NLT = 0x0000000e; - constexpr unsigned int SQ_NUM_ATTR = 0x00000021; - constexpr unsigned int SQ_NUM_SGPR = 0x00000066; - constexpr unsigned int SQ_NUM_TTMP = 0x00000010; - constexpr unsigned int SQ_NUM_VGPR = 0x00000100; - constexpr unsigned int SQ_O = 0x00000007; - constexpr unsigned int SQ_OMOD_D2 = 0x00000003; - constexpr unsigned int SQ_OMOD_M2 = 0x00000001; - constexpr unsigned int SQ_OMOD_M4 = 0x00000002; - constexpr unsigned int SQ_OMOD_OFF = 0x00000000; - constexpr unsigned int SQ_PARAM_P0 = 0x00000002; - constexpr unsigned int SQ_PARAM_P10 = 0x00000000; - constexpr unsigned int SQ_PARAM_P20 = 0x00000001; - constexpr unsigned int SQ_R1 = 0x00000001; - constexpr unsigned int SQ_R2 = 0x00000002; - constexpr unsigned int SQ_R3 = 0x00000003; - constexpr unsigned int SQ_R4 = 0x00000004; - constexpr unsigned int SQ_R5 = 0x00000005; - constexpr unsigned int SQ_R6 = 0x00000006; - constexpr unsigned int SQ_R7 = 0x00000007; - constexpr unsigned int SQ_R8 = 0x00000008; - constexpr unsigned int SQ_R9 = 0x00000009; - constexpr unsigned int SQ_R10 = 0x0000000a; - constexpr unsigned int SQ_R11 = 0x0000000b; - constexpr unsigned int SQ_R12 = 0x0000000c; - constexpr unsigned int SQ_R13 = 0x0000000d; - constexpr unsigned int SQ_R14 = 0x0000000e; - constexpr unsigned int SQ_R15 = 0x0000000f; - constexpr unsigned int SQ_SCRATCH = 0x00000001; - constexpr unsigned int SQ_SCRATCH_LOAD_DWORD = 0x00000014; - constexpr unsigned int SQ_SCRATCH_LOAD_DWORDX2 = 0x00000015; - constexpr unsigned int SQ_SCRATCH_LOAD_DWORDX3 = 0x00000016; - constexpr unsigned int SQ_SCRATCH_LOAD_DWORDX4 = 0x00000017; - constexpr unsigned int SQ_SCRATCH_LOAD_SBYTE = 0x00000011; - constexpr unsigned int SQ_SCRATCH_LOAD_SBYTE_D16 = 0x00000022; - constexpr unsigned int SQ_SCRATCH_LOAD_SBYTE_D16_HI = 0x00000023; - constexpr unsigned int SQ_SCRATCH_LOAD_SHORT_D16 = 0x00000024; - constexpr unsigned int SQ_SCRATCH_LOAD_SHORT_D16_HI = 0x00000025; - constexpr unsigned int SQ_SCRATCH_LOAD_SSHORT = 0x00000013; - constexpr unsigned int SQ_SCRATCH_LOAD_UBYTE = 0x00000010; - constexpr unsigned int SQ_SCRATCH_LOAD_UBYTE_D16 = 0x00000020; - constexpr unsigned int SQ_SCRATCH_LOAD_UBYTE_D16_HI = 0x00000021; - constexpr unsigned int SQ_SCRATCH_LOAD_USHORT = 0x00000012; - constexpr unsigned int SQ_SCRATCH_STORE_BYTE = 0x00000018; - constexpr unsigned int SQ_SCRATCH_STORE_BYTE_D16_HI = 0x00000019; - constexpr unsigned int SQ_SCRATCH_STORE_DWORD = 0x0000001c; - constexpr unsigned int SQ_SCRATCH_STORE_DWORDX2 = 0x0000001d; - constexpr unsigned int SQ_SCRATCH_STORE_DWORDX3 = 0x0000001e; - constexpr unsigned int SQ_SCRATCH_STORE_DWORDX4 = 0x0000001f; - constexpr unsigned int SQ_SCRATCH_STORE_SHORT = 0x0000001a; - constexpr unsigned int SQ_SCRATCH_STORE_SHORT_D16_HI = 0x0000001b; - constexpr unsigned int SQ_SDWA_BYTE_0 = 0x00000000; - constexpr unsigned int SQ_SDWA_BYTE_1 = 0x00000001; - constexpr unsigned int SQ_SDWA_BYTE_2 = 0x00000002; - constexpr unsigned int SQ_SDWA_BYTE_3 = 0x00000003; - constexpr unsigned int SQ_SDWA_DWORD = 0x00000006; - constexpr unsigned int SQ_SDWA_UNUSED_PAD = 0x00000000; - constexpr unsigned int SQ_SDWA_UNUSED_PRESERVE = 0x00000002; - constexpr unsigned int SQ_SDWA_UNUSED_SEXT = 0x00000001; - constexpr unsigned int SQ_SDWA_WORD_0 = 0x00000004; - constexpr unsigned int SQ_SDWA_WORD_1 = 0x00000005; - constexpr unsigned int SQ_SENDMSG_GSOP_SHIFT = 0x00000004; - constexpr unsigned int SQ_SENDMSG_GSOP_SIZE = 0x00000002; - constexpr unsigned int SQ_SENDMSG_MSG_SHIFT = 0x00000000; - constexpr unsigned int SQ_SENDMSG_MSG_SIZE = 0x00000004; - constexpr unsigned int SQ_SENDMSG_STREAMID_SHIFT = 0x00000008; - constexpr unsigned int SQ_SENDMSG_STREAMID_SIZE = 0x00000002; - constexpr unsigned int SQ_SENDMSG_SYSTEM_SHIFT = 0x00000004; - constexpr unsigned int SQ_SENDMSG_SYSTEM_SIZE = 0x00000003; - constexpr unsigned int SQ_SGPR0 = 0x00000000; - constexpr unsigned int SQ_SRC_0 = 0x00000080; - constexpr unsigned int SQ_SRC_0_5 = 0x000000f0; - constexpr unsigned int SQ_SRC_1 = 0x000000f2; - constexpr unsigned int SQ_SRC_1_INT = 0x00000081; - constexpr unsigned int SQ_SRC_2 = 0x000000f4; - constexpr unsigned int SQ_SRC_2_INT = 0x00000082; - constexpr unsigned int SQ_SRC_3_INT = 0x00000083; - constexpr unsigned int SQ_SRC_4 = 0x000000f6; - constexpr unsigned int SQ_SRC_4_INT = 0x00000084; - constexpr unsigned int SQ_SRC_5_INT = 0x00000085; - constexpr unsigned int SQ_SRC_6_INT = 0x00000086; - constexpr unsigned int SQ_SRC_7_INT = 0x00000087; - constexpr unsigned int SQ_SRC_8_INT = 0x00000088; - constexpr unsigned int SQ_SRC_9_INT = 0x00000089; - constexpr unsigned int SQ_SRC_10_INT = 0x0000008a; - constexpr unsigned int SQ_SRC_11_INT = 0x0000008b; - constexpr unsigned int SQ_SRC_12_INT = 0x0000008c; - constexpr unsigned int SQ_SRC_13_INT = 0x0000008d; - constexpr unsigned int SQ_SRC_14_INT = 0x0000008e; - constexpr unsigned int SQ_SRC_15_INT = 0x0000008f; - constexpr unsigned int SQ_SRC_16_INT = 0x00000090; - constexpr unsigned int SQ_SRC_17_INT = 0x00000091; - constexpr unsigned int SQ_SRC_18_INT = 0x00000092; - constexpr unsigned int SQ_SRC_19_INT = 0x00000093; - constexpr unsigned int SQ_SRC_20_INT = 0x00000094; - constexpr unsigned int SQ_SRC_21_INT = 0x00000095; - constexpr unsigned int SQ_SRC_22_INT = 0x00000096; - constexpr unsigned int SQ_SRC_23_INT = 0x00000097; - constexpr unsigned int SQ_SRC_24_INT = 0x00000098; - constexpr unsigned int SQ_SRC_25_INT = 0x00000099; - constexpr unsigned int SQ_SRC_26_INT = 0x0000009a; - constexpr unsigned int SQ_SRC_27_INT = 0x0000009b; - constexpr unsigned int SQ_SRC_28_INT = 0x0000009c; - constexpr unsigned int SQ_SRC_29_INT = 0x0000009d; - constexpr unsigned int SQ_SRC_30_INT = 0x0000009e; - constexpr unsigned int SQ_SRC_31_INT = 0x0000009f; - constexpr unsigned int SQ_SRC_32_INT = 0x000000a0; - constexpr unsigned int SQ_SRC_33_INT = 0x000000a1; - constexpr unsigned int SQ_SRC_34_INT = 0x000000a2; - constexpr unsigned int SQ_SRC_35_INT = 0x000000a3; - constexpr unsigned int SQ_SRC_36_INT = 0x000000a4; - constexpr unsigned int SQ_SRC_37_INT = 0x000000a5; - constexpr unsigned int SQ_SRC_38_INT = 0x000000a6; - constexpr unsigned int SQ_SRC_39_INT = 0x000000a7; - constexpr unsigned int SQ_SRC_40_INT = 0x000000a8; - constexpr unsigned int SQ_SRC_41_INT = 0x000000a9; - constexpr unsigned int SQ_SRC_42_INT = 0x000000aa; - constexpr unsigned int SQ_SRC_43_INT = 0x000000ab; - constexpr unsigned int SQ_SRC_44_INT = 0x000000ac; - constexpr unsigned int SQ_SRC_45_INT = 0x000000ad; - constexpr unsigned int SQ_SRC_46_INT = 0x000000ae; - constexpr unsigned int SQ_SRC_47_INT = 0x000000af; - constexpr unsigned int SQ_SRC_48_INT = 0x000000b0; - constexpr unsigned int SQ_SRC_49_INT = 0x000000b1; - constexpr unsigned int SQ_SRC_50_INT = 0x000000b2; - constexpr unsigned int SQ_SRC_51_INT = 0x000000b3; - constexpr unsigned int SQ_SRC_52_INT = 0x000000b4; - constexpr unsigned int SQ_SRC_53_INT = 0x000000b5; - constexpr unsigned int SQ_SRC_54_INT = 0x000000b6; - constexpr unsigned int SQ_SRC_55_INT = 0x000000b7; - constexpr unsigned int SQ_SRC_56_INT = 0x000000b8; - constexpr unsigned int SQ_SRC_57_INT = 0x000000b9; - constexpr unsigned int SQ_SRC_58_INT = 0x000000ba; - constexpr unsigned int SQ_SRC_59_INT = 0x000000bb; - constexpr unsigned int SQ_SRC_60_INT = 0x000000bc; - constexpr unsigned int SQ_SRC_61_INT = 0x000000bd; - constexpr unsigned int SQ_SRC_62_INT = 0x000000be; - constexpr unsigned int SQ_SRC_63_INT = 0x000000bf; - constexpr unsigned int SQ_SRC_64_INT = 0x000000c0; - constexpr unsigned int SQ_SRC_DPP = 0x000000fa; - constexpr unsigned int SQ_SRC_EXECZ = 0x000000fc; - constexpr unsigned int SQ_SRC_INV_2PI = 0x000000f8; - constexpr unsigned int SQ_SRC_LDS_DIRECT = 0x000000fe; - constexpr unsigned int SQ_SRC_LITERAL = 0x000000ff; - constexpr unsigned int SQ_SRC_M_0_5 = 0x000000f1; - constexpr unsigned int SQ_SRC_M_1 = 0x000000f3; - constexpr unsigned int SQ_SRC_M_1_INT = 0x000000c1; - constexpr unsigned int SQ_SRC_M_2 = 0x000000f5; - constexpr unsigned int SQ_SRC_M_2_INT = 0x000000c2; - constexpr unsigned int SQ_SRC_M_3_INT = 0x000000c3; - constexpr unsigned int SQ_SRC_M_4 = 0x000000f7; - constexpr unsigned int SQ_SRC_M_4_INT = 0x000000c4; - constexpr unsigned int SQ_SRC_M_5_INT = 0x000000c5; - constexpr unsigned int SQ_SRC_M_6_INT = 0x000000c6; - constexpr unsigned int SQ_SRC_M_7_INT = 0x000000c7; - constexpr unsigned int SQ_SRC_M_8_INT = 0x000000c8; - constexpr unsigned int SQ_SRC_M_9_INT = 0x000000c9; - constexpr unsigned int SQ_SRC_M_10_INT = 0x000000ca; - constexpr unsigned int SQ_SRC_M_11_INT = 0x000000cb; - constexpr unsigned int SQ_SRC_M_12_INT = 0x000000cc; - constexpr unsigned int SQ_SRC_M_13_INT = 0x000000cd; - constexpr unsigned int SQ_SRC_M_14_INT = 0x000000ce; - constexpr unsigned int SQ_SRC_M_15_INT = 0x000000cf; - constexpr unsigned int SQ_SRC_M_16_INT = 0x000000d0; - constexpr unsigned int SQ_SRC_POPS_EXITING_WAVE_ID = 0x000000ef; - constexpr unsigned int SQ_SRC_PRIVATE_BASE = 0x000000ed; - constexpr unsigned int SQ_SRC_PRIVATE_LIMIT = 0x000000ee; - constexpr unsigned int SQ_SRC_SCC = 0x000000fd; - constexpr unsigned int SQ_SRC_SDWA = 0x000000f9; - constexpr unsigned int SQ_SRC_SHARED_BASE = 0x000000eb; - constexpr unsigned int SQ_SRC_SHARED_LIMIT = 0x000000ec; - constexpr unsigned int SQ_SRC_VCCZ = 0x000000fb; - constexpr unsigned int SQ_SRC_VGPR0 = 0x00000100; - constexpr unsigned int SQ_SRC_VGPR_BIT = 0x00000100; - constexpr unsigned int SQ_SYSMSG_OP_ECC_ERR_INTERRUPT = 0x00000001; - constexpr unsigned int SQ_SYSMSG_OP_HOST_TRAP_ACK = 0x00000003; - constexpr unsigned int SQ_SYSMSG_OP_ILLEGAL_INST_INTERRUPT = 0x00000005; - constexpr unsigned int SQ_SYSMSG_OP_MEMVIOL_INTERRUPT = 0x00000006; - constexpr unsigned int SQ_SYSMSG_OP_REG_RD = 0x00000002; - constexpr unsigned int SQ_SYSMSG_OP_TTRACE_PC = 0x00000004; - constexpr unsigned int SQ_S_ABSDIFF_I32 = 0x0000002a; - constexpr unsigned int SQ_S_ABS_I32 = 0x00000030; - constexpr unsigned int SQ_S_ADDC_U32 = 0x00000004; - constexpr unsigned int SQ_S_ADDK_I32 = 0x0000000e; - constexpr unsigned int SQ_S_ADD_I32 = 0x00000002; - constexpr unsigned int SQ_S_ADD_U32 = 0x00000000; - constexpr unsigned int SQ_S_ANDN1_SAVEEXEC_B64 = 0x00000033; - constexpr unsigned int SQ_S_ANDN1_WREXEC_B64 = 0x00000035; - constexpr unsigned int SQ_S_ANDN2_B32 = 0x00000012; - constexpr unsigned int SQ_S_ANDN2_B64 = 0x00000013; - constexpr unsigned int SQ_S_ANDN2_SAVEEXEC_B64 = 0x00000023; - constexpr unsigned int SQ_S_ANDN2_WREXEC_B64 = 0x00000036; - constexpr unsigned int SQ_S_AND_B32 = 0x0000000c; - constexpr unsigned int SQ_S_AND_B64 = 0x0000000d; - constexpr unsigned int SQ_S_AND_SAVEEXEC_B64 = 0x00000020; - constexpr unsigned int SQ_S_ASHR_I32 = 0x00000020; - constexpr unsigned int SQ_S_ASHR_I64 = 0x00000021; - constexpr unsigned int SQ_S_ATC_PROBE = 0x00000026; - constexpr unsigned int SQ_S_ATC_PROBE_BUFFER = 0x00000027; - constexpr unsigned int SQ_S_ATOMIC_ADD = 0x00000082; - constexpr unsigned int SQ_S_ATOMIC_ADD_X2 = 0x000000a2; - constexpr unsigned int SQ_S_ATOMIC_AND = 0x00000088; - constexpr unsigned int SQ_S_ATOMIC_AND_X2 = 0x000000a8; - constexpr unsigned int SQ_S_ATOMIC_CMPSWAP = 0x00000081; - constexpr unsigned int SQ_S_ATOMIC_CMPSWAP_X2 = 0x000000a1; - constexpr unsigned int SQ_S_ATOMIC_DEC = 0x0000008c; - constexpr unsigned int SQ_S_ATOMIC_DEC_X2 = 0x000000ac; - constexpr unsigned int SQ_S_ATOMIC_INC = 0x0000008b; - constexpr unsigned int SQ_S_ATOMIC_INC_X2 = 0x000000ab; - constexpr unsigned int SQ_S_ATOMIC_OR = 0x00000089; - constexpr unsigned int SQ_S_ATOMIC_OR_X2 = 0x000000a9; - constexpr unsigned int SQ_S_ATOMIC_SMAX = 0x00000086; - constexpr unsigned int SQ_S_ATOMIC_SMAX_X2 = 0x000000a6; - constexpr unsigned int SQ_S_ATOMIC_SMIN = 0x00000084; - constexpr unsigned int SQ_S_ATOMIC_SMIN_X2 = 0x000000a4; - constexpr unsigned int SQ_S_ATOMIC_SUB = 0x00000083; - constexpr unsigned int SQ_S_ATOMIC_SUB_X2 = 0x000000a3; - constexpr unsigned int SQ_S_ATOMIC_SWAP = 0x00000080; - constexpr unsigned int SQ_S_ATOMIC_SWAP_X2 = 0x000000a0; - constexpr unsigned int SQ_S_ATOMIC_UMAX = 0x00000087; - constexpr unsigned int SQ_S_ATOMIC_UMAX_X2 = 0x000000a7; - constexpr unsigned int SQ_S_ATOMIC_UMIN = 0x00000085; - constexpr unsigned int SQ_S_ATOMIC_UMIN_X2 = 0x000000a5; - constexpr unsigned int SQ_S_ATOMIC_XOR = 0x0000008a; - constexpr unsigned int SQ_S_ATOMIC_XOR_X2 = 0x000000aa; - constexpr unsigned int SQ_S_BARRIER = 0x0000000a; - constexpr unsigned int SQ_S_BCNT0_I32_B32 = 0x0000000a; - constexpr unsigned int SQ_S_BCNT0_I32_B64 = 0x0000000b; - constexpr unsigned int SQ_S_BCNT1_I32_B32 = 0x0000000c; - constexpr unsigned int SQ_S_BCNT1_I32_B64 = 0x0000000d; - constexpr unsigned int SQ_S_BFE_I32 = 0x00000026; - constexpr unsigned int SQ_S_BFE_I64 = 0x00000028; - constexpr unsigned int SQ_S_BFE_U32 = 0x00000025; - constexpr unsigned int SQ_S_BFE_U64 = 0x00000027; - constexpr unsigned int SQ_S_BFM_B32 = 0x00000022; - constexpr unsigned int SQ_S_BFM_B64 = 0x00000023; - constexpr unsigned int SQ_S_BITCMP0_B32 = 0x0000000c; - constexpr unsigned int SQ_S_BITCMP0_B64 = 0x0000000e; - constexpr unsigned int SQ_S_BITCMP1_B32 = 0x0000000d; - constexpr unsigned int SQ_S_BITCMP1_B64 = 0x0000000f; - constexpr unsigned int SQ_S_BITREPLICATE_B64_B32 = 0x00000037; - constexpr unsigned int SQ_S_BITSET0_B32 = 0x00000018; - constexpr unsigned int SQ_S_BITSET0_B64 = 0x00000019; - constexpr unsigned int SQ_S_BITSET1_B32 = 0x0000001a; - constexpr unsigned int SQ_S_BITSET1_B64 = 0x0000001b; - constexpr unsigned int SQ_S_BRANCH = 0x00000002; - constexpr unsigned int SQ_S_BREV_B32 = 0x00000008; - constexpr unsigned int SQ_S_BREV_B64 = 0x00000009; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_ADD = 0x00000042; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_ADD_X2 = 0x00000062; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_AND = 0x00000048; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_AND_X2 = 0x00000068; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_CMPSWAP = 0x00000041; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_CMPSWAP_X2 = 0x00000061; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_DEC = 0x0000004c; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_DEC_X2 = 0x0000006c; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_INC = 0x0000004b; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_INC_X2 = 0x0000006b; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_OR = 0x00000049; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_OR_X2 = 0x00000069; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_SMAX = 0x00000046; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_SMAX_X2 = 0x00000066; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_SMIN = 0x00000044; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_SMIN_X2 = 0x00000064; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_SUB = 0x00000043; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_SUB_X2 = 0x00000063; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_SWAP = 0x00000040; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_SWAP_X2 = 0x00000060; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_UMAX = 0x00000047; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_UMAX_X2 = 0x00000067; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_UMIN = 0x00000045; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_UMIN_X2 = 0x00000065; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_XOR = 0x0000004a; - constexpr unsigned int SQ_S_BUFFER_ATOMIC_XOR_X2 = 0x0000006a; - constexpr unsigned int SQ_S_BUFFER_LOAD_DWORD = 0x00000008; - constexpr unsigned int SQ_S_BUFFER_LOAD_DWORDX2 = 0x00000009; - constexpr unsigned int SQ_S_BUFFER_LOAD_DWORDX4 = 0x0000000a; - constexpr unsigned int SQ_S_BUFFER_LOAD_DWORDX8 = 0x0000000b; - constexpr unsigned int SQ_S_BUFFER_LOAD_DWORDX16 = 0x0000000c; - constexpr unsigned int SQ_S_BUFFER_STORE_DWORD = 0x00000018; - constexpr unsigned int SQ_S_BUFFER_STORE_DWORDX2 = 0x00000019; - constexpr unsigned int SQ_S_BUFFER_STORE_DWORDX4 = 0x0000001a; - constexpr unsigned int SQ_S_CALL_B64 = 0x00000015; - constexpr unsigned int SQ_S_CBRANCH_CDBGSYS = 0x00000017; - constexpr unsigned int SQ_S_CBRANCH_CDBGSYS_AND_USER = 0x0000001a; - constexpr unsigned int SQ_S_CBRANCH_CDBGSYS_OR_USER = 0x00000019; - constexpr unsigned int SQ_S_CBRANCH_CDBGUSER = 0x00000018; - constexpr unsigned int SQ_S_CBRANCH_EXECNZ = 0x00000009; - constexpr unsigned int SQ_S_CBRANCH_EXECZ = 0x00000008; - constexpr unsigned int SQ_S_CBRANCH_G_FORK = 0x00000029; - constexpr unsigned int SQ_S_CBRANCH_I_FORK = 0x00000010; - constexpr unsigned int SQ_S_CBRANCH_JOIN = 0x0000002e; - constexpr unsigned int SQ_S_CBRANCH_SCC0 = 0x00000004; - constexpr unsigned int SQ_S_CBRANCH_SCC1 = 0x00000005; - constexpr unsigned int SQ_S_CBRANCH_VCCNZ = 0x00000007; - constexpr unsigned int SQ_S_CBRANCH_VCCZ = 0x00000006; - constexpr unsigned int SQ_S_CMOVK_I32 = 0x00000001; - constexpr unsigned int SQ_S_CMOV_B32 = 0x00000002; - constexpr unsigned int SQ_S_CMOV_B64 = 0x00000003; - constexpr unsigned int SQ_S_CMPK_EQ_I32 = 0x00000002; - constexpr unsigned int SQ_S_CMPK_EQ_U32 = 0x00000008; - constexpr unsigned int SQ_S_CMPK_GE_I32 = 0x00000005; - constexpr unsigned int SQ_S_CMPK_GE_U32 = 0x0000000b; - constexpr unsigned int SQ_S_CMPK_GT_I32 = 0x00000004; - constexpr unsigned int SQ_S_CMPK_GT_U32 = 0x0000000a; - constexpr unsigned int SQ_S_CMPK_LE_I32 = 0x00000007; - constexpr unsigned int SQ_S_CMPK_LE_U32 = 0x0000000d; - constexpr unsigned int SQ_S_CMPK_LG_I32 = 0x00000003; - constexpr unsigned int SQ_S_CMPK_LG_U32 = 0x00000009; - constexpr unsigned int SQ_S_CMPK_LT_I32 = 0x00000006; - constexpr unsigned int SQ_S_CMPK_LT_U32 = 0x0000000c; - constexpr unsigned int SQ_S_CMP_EQ_I32 = 0x00000000; - constexpr unsigned int SQ_S_CMP_EQ_U32 = 0x00000006; - constexpr unsigned int SQ_S_CMP_EQ_U64 = 0x00000012; - constexpr unsigned int SQ_S_CMP_GE_I32 = 0x00000003; - constexpr unsigned int SQ_S_CMP_GE_U32 = 0x00000009; - constexpr unsigned int SQ_S_CMP_GT_I32 = 0x00000002; - constexpr unsigned int SQ_S_CMP_GT_U32 = 0x00000008; - constexpr unsigned int SQ_S_CMP_LE_I32 = 0x00000005; - constexpr unsigned int SQ_S_CMP_LE_U32 = 0x0000000b; - constexpr unsigned int SQ_S_CMP_LG_I32 = 0x00000001; - constexpr unsigned int SQ_S_CMP_LG_U32 = 0x00000007; - constexpr unsigned int SQ_S_CMP_LG_U64 = 0x00000013; - constexpr unsigned int SQ_S_CMP_LT_I32 = 0x00000004; - constexpr unsigned int SQ_S_CMP_LT_U32 = 0x0000000a; - constexpr unsigned int SQ_S_CSELECT_B32 = 0x0000000a; - constexpr unsigned int SQ_S_CSELECT_B64 = 0x0000000b; - constexpr unsigned int SQ_S_DCACHE_DISCARD = 0x00000028; - constexpr unsigned int SQ_S_DCACHE_DISCARD_X2 = 0x00000029; - constexpr unsigned int SQ_S_DCACHE_INV = 0x00000020; - constexpr unsigned int SQ_S_DCACHE_INV_VOL = 0x00000022; - constexpr unsigned int SQ_S_DCACHE_WB = 0x00000021; - constexpr unsigned int SQ_S_DCACHE_WB_VOL = 0x00000023; - constexpr unsigned int SQ_S_DECPERFLEVEL = 0x00000015; - constexpr unsigned int SQ_S_ENDPGM = 0x00000001; - constexpr unsigned int SQ_S_ENDPGM_ORDERED_PS_DONE = 0x0000001e; - constexpr unsigned int SQ_S_ENDPGM_SAVED = 0x0000001b; - constexpr unsigned int SQ_S_FF0_I32_B32 = 0x0000000e; - constexpr unsigned int SQ_S_FF0_I32_B64 = 0x0000000f; - constexpr unsigned int SQ_S_FF1_I32_B32 = 0x00000010; - constexpr unsigned int SQ_S_FF1_I32_B64 = 0x00000011; - constexpr unsigned int SQ_S_FLBIT_I32 = 0x00000014; - constexpr unsigned int SQ_S_FLBIT_I32_B32 = 0x00000012; - constexpr unsigned int SQ_S_FLBIT_I32_B64 = 0x00000013; - constexpr unsigned int SQ_S_FLBIT_I32_I64 = 0x00000015; - constexpr unsigned int SQ_S_GETPC_B64 = 0x0000001c; - constexpr unsigned int SQ_S_GETREG_B32 = 0x00000011; - constexpr unsigned int SQ_S_GETREG_REGRD_B32 = 0x00000013; - constexpr unsigned int SQ_S_ICACHE_INV = 0x00000013; - constexpr unsigned int SQ_S_INCPERFLEVEL = 0x00000014; - constexpr unsigned int SQ_S_LOAD_DWORD = 0x00000000; - constexpr unsigned int SQ_S_LOAD_DWORDX2 = 0x00000001; - constexpr unsigned int SQ_S_LOAD_DWORDX4 = 0x00000002; - constexpr unsigned int SQ_S_LOAD_DWORDX8 = 0x00000003; - constexpr unsigned int SQ_S_LOAD_DWORDX16 = 0x00000004; - constexpr unsigned int SQ_S_LSHL1_ADD_U32 = 0x0000002e; - constexpr unsigned int SQ_S_LSHL2_ADD_U32 = 0x0000002f; - constexpr unsigned int SQ_S_LSHL3_ADD_U32 = 0x00000030; - constexpr unsigned int SQ_S_LSHL4_ADD_U32 = 0x00000031; - constexpr unsigned int SQ_S_LSHL_B32 = 0x0000001c; - constexpr unsigned int SQ_S_LSHL_B64 = 0x0000001d; - constexpr unsigned int SQ_S_LSHR_B32 = 0x0000001e; - constexpr unsigned int SQ_S_LSHR_B64 = 0x0000001f; - constexpr unsigned int SQ_S_MAX_I32 = 0x00000008; - constexpr unsigned int SQ_S_MAX_U32 = 0x00000009; - constexpr unsigned int SQ_S_MEMREALTIME = 0x00000025; - constexpr unsigned int SQ_S_MEMTIME = 0x00000024; - constexpr unsigned int SQ_S_MIN_I32 = 0x00000006; - constexpr unsigned int SQ_S_MIN_U32 = 0x00000007; - constexpr unsigned int SQ_S_MOVK_I32 = 0x00000000; - constexpr unsigned int SQ_S_MOVRELD_B32 = 0x0000002c; - constexpr unsigned int SQ_S_MOVRELD_B64 = 0x0000002d; - constexpr unsigned int SQ_S_MOVRELS_B32 = 0x0000002a; - constexpr unsigned int SQ_S_MOVRELS_B64 = 0x0000002b; - constexpr unsigned int SQ_S_MOV_B32 = 0x00000000; - constexpr unsigned int SQ_S_MOV_B64 = 0x00000001; - constexpr unsigned int SQ_S_MOV_FED_B32 = 0x00000031; - constexpr unsigned int SQ_S_MOV_REGRD_B32 = 0x0000002f; - constexpr unsigned int SQ_S_MULK_I32 = 0x0000000f; - constexpr unsigned int SQ_S_MUL_HI_I32 = 0x0000002d; - constexpr unsigned int SQ_S_MUL_HI_U32 = 0x0000002c; - constexpr unsigned int SQ_S_MUL_I32 = 0x00000024; - constexpr unsigned int SQ_S_NAND_B32 = 0x00000016; - constexpr unsigned int SQ_S_NAND_B64 = 0x00000017; - constexpr unsigned int SQ_S_NAND_SAVEEXEC_B64 = 0x00000025; - constexpr unsigned int SQ_S_NOP = 0x00000000; - constexpr unsigned int SQ_S_NOR_B32 = 0x00000018; - constexpr unsigned int SQ_S_NOR_B64 = 0x00000019; - constexpr unsigned int SQ_S_NOR_SAVEEXEC_B64 = 0x00000026; - constexpr unsigned int SQ_S_NOT_B32 = 0x00000004; - constexpr unsigned int SQ_S_NOT_B64 = 0x00000005; - constexpr unsigned int SQ_S_ORN1_SAVEEXEC_B64 = 0x00000034; - constexpr unsigned int SQ_S_ORN2_B32 = 0x00000014; - constexpr unsigned int SQ_S_ORN2_B64 = 0x00000015; - constexpr unsigned int SQ_S_ORN2_SAVEEXEC_B64 = 0x00000024; - constexpr unsigned int SQ_S_OR_B32 = 0x0000000e; - constexpr unsigned int SQ_S_OR_B64 = 0x0000000f; - constexpr unsigned int SQ_S_OR_SAVEEXEC_B64 = 0x00000021; - constexpr unsigned int SQ_S_PACK_HH_B32_B16 = 0x00000034; - constexpr unsigned int SQ_S_PACK_LH_B32_B16 = 0x00000033; - constexpr unsigned int SQ_S_PACK_LL_B32_B16 = 0x00000032; - constexpr unsigned int SQ_S_QUADMASK_B32 = 0x00000028; - constexpr unsigned int SQ_S_QUADMASK_B64 = 0x00000029; - constexpr unsigned int SQ_S_RFE_B64 = 0x0000001f; - constexpr unsigned int SQ_S_RFE_RESTORE_B64 = 0x0000002b; - constexpr unsigned int SQ_S_SCRATCH_LOAD_DWORD = 0x00000005; - constexpr unsigned int SQ_S_SCRATCH_LOAD_DWORDX2 = 0x00000006; - constexpr unsigned int SQ_S_SCRATCH_LOAD_DWORDX4 = 0x00000007; - constexpr unsigned int SQ_S_SCRATCH_STORE_DWORD = 0x00000015; - constexpr unsigned int SQ_S_SCRATCH_STORE_DWORDX2 = 0x00000016; - constexpr unsigned int SQ_S_SCRATCH_STORE_DWORDX4 = 0x00000017; - constexpr unsigned int SQ_S_SENDMSG = 0x00000010; - constexpr unsigned int SQ_S_SENDMSGHALT = 0x00000011; - constexpr unsigned int SQ_S_SETHALT = 0x0000000d; - constexpr unsigned int SQ_S_SETKILL = 0x0000000b; - constexpr unsigned int SQ_S_SETPC_B64 = 0x0000001d; - constexpr unsigned int SQ_S_SETPRIO = 0x0000000f; - constexpr unsigned int SQ_S_SETREG_B32 = 0x00000012; - constexpr unsigned int SQ_S_SETREG_IMM32_B32 = 0x00000014; - constexpr unsigned int SQ_S_SETVSKIP = 0x00000010; - constexpr unsigned int SQ_S_SET_GPR_IDX_IDX = 0x00000032; - constexpr unsigned int SQ_S_SET_GPR_IDX_MODE = 0x0000001d; - constexpr unsigned int SQ_S_SET_GPR_IDX_OFF = 0x0000001c; - constexpr unsigned int SQ_S_SET_GPR_IDX_ON = 0x00000011; - constexpr unsigned int SQ_S_SEXT_I32_I8 = 0x00000016; - constexpr unsigned int SQ_S_SEXT_I32_I16 = 0x00000017; - constexpr unsigned int SQ_S_SLEEP = 0x0000000e; - constexpr unsigned int SQ_S_STORE_DWORD = 0x00000010; - constexpr unsigned int SQ_S_STORE_DWORDX2 = 0x00000011; - constexpr unsigned int SQ_S_STORE_DWORDX4 = 0x00000012; - constexpr unsigned int SQ_S_SUBB_U32 = 0x00000005; - constexpr unsigned int SQ_S_SUB_I32 = 0x00000003; - constexpr unsigned int SQ_S_SUB_U32 = 0x00000001; - constexpr unsigned int SQ_S_SWAPPC_B64 = 0x0000001e; - constexpr unsigned int SQ_S_TRAP = 0x00000012; - constexpr unsigned int SQ_S_TTRACEDATA = 0x00000016; - constexpr unsigned int SQ_S_WAITCNT = 0x0000000c; - constexpr unsigned int SQ_S_WAKEUP = 0x00000003; - constexpr unsigned int SQ_S_WQM_B32 = 0x00000006; - constexpr unsigned int SQ_S_WQM_B64 = 0x00000007; - constexpr unsigned int SQ_S_XNOR_B32 = 0x0000001a; - constexpr unsigned int SQ_S_XNOR_B64 = 0x0000001b; - constexpr unsigned int SQ_S_XNOR_SAVEEXEC_B64 = 0x00000027; - constexpr unsigned int SQ_S_XOR_B32 = 0x00000010; - constexpr unsigned int SQ_S_XOR_B64 = 0x00000011; - constexpr unsigned int SQ_S_XOR_SAVEEXEC_B64 = 0x00000022; - constexpr unsigned int SQ_T = 0x00000007; - constexpr unsigned int SQ_TBUFFER_LOAD_FORMAT_D16_X = 0x00000008; - constexpr unsigned int SQ_TBUFFER_LOAD_FORMAT_D16_XY = 0x00000009; - constexpr unsigned int SQ_TBUFFER_LOAD_FORMAT_D16_XYZ = 0x0000000a; - constexpr unsigned int SQ_TBUFFER_LOAD_FORMAT_D16_XYZW = 0x0000000b; - constexpr unsigned int SQ_TBUFFER_LOAD_FORMAT_X = 0x00000000; - constexpr unsigned int SQ_TBUFFER_LOAD_FORMAT_XY = 0x00000001; - constexpr unsigned int SQ_TBUFFER_LOAD_FORMAT_XYZ = 0x00000002; - constexpr unsigned int SQ_TBUFFER_LOAD_FORMAT_XYZW = 0x00000003; - constexpr unsigned int SQ_TBUFFER_STORE_FORMAT_D16_X = 0x0000000c; - constexpr unsigned int SQ_TBUFFER_STORE_FORMAT_D16_XY = 0x0000000d; - constexpr unsigned int SQ_TBUFFER_STORE_FORMAT_D16_XYZ = 0x0000000e; - constexpr unsigned int SQ_TBUFFER_STORE_FORMAT_D16_XYZW = 0x0000000f; - constexpr unsigned int SQ_TBUFFER_STORE_FORMAT_X = 0x00000004; - constexpr unsigned int SQ_TBUFFER_STORE_FORMAT_XY = 0x00000005; - constexpr unsigned int SQ_TBUFFER_STORE_FORMAT_XYZ = 0x00000006; - constexpr unsigned int SQ_TBUFFER_STORE_FORMAT_XYZW = 0x00000007; - constexpr unsigned int SQ_THREAD_TRACE_TIME_UNIT = 0x00000004; - constexpr unsigned int SQ_TRU = 0x0000000f; - constexpr unsigned int SQ_TTMP0 = 0x0000006c; - constexpr unsigned int SQ_TTMP1 = 0x0000006d; - constexpr unsigned int SQ_TTMP2 = 0x0000006e; - constexpr unsigned int SQ_TTMP3 = 0x0000006f; - constexpr unsigned int SQ_TTMP4 = 0x00000070; - constexpr unsigned int SQ_TTMP5 = 0x00000071; - constexpr unsigned int SQ_TTMP6 = 0x00000072; - constexpr unsigned int SQ_TTMP7 = 0x00000073; - constexpr unsigned int SQ_TTMP8 = 0x00000074; - constexpr unsigned int SQ_TTMP9 = 0x00000075; - constexpr unsigned int SQ_TTMP10 = 0x00000076; - constexpr unsigned int SQ_TTMP11 = 0x00000077; - constexpr unsigned int SQ_TTMP12 = 0x00000078; - constexpr unsigned int SQ_TTMP13 = 0x00000079; - constexpr unsigned int SQ_TTMP14 = 0x0000007a; - constexpr unsigned int SQ_TTMP15 = 0x0000007b; - constexpr unsigned int SQ_U = 0x00000008; - constexpr unsigned int SQ_VCC_ALL = 0x00000000; - constexpr unsigned int SQ_VCC_HI = 0x0000006b; - constexpr unsigned int SQ_VCC_LO = 0x0000006a; - constexpr unsigned int SQ_VGPR0 = 0x00000000; - constexpr unsigned int SQ_V_ADD3_U32 = 0x000001ff; - constexpr unsigned int SQ_V_ADDC_CO_U32 = 0x0000001c; - constexpr unsigned int SQ_V_ADD_CO_U32 = 0x00000019; - constexpr unsigned int SQ_V_ADD_F16 = 0x0000001f; - constexpr unsigned int SQ_V_ADD_F32 = 0x00000001; - constexpr unsigned int SQ_V_ADD_F64 = 0x00000280; - constexpr unsigned int SQ_V_ADD_I16 = 0x0000029e; - constexpr unsigned int SQ_V_ADD_I32 = 0x0000029c; - constexpr unsigned int SQ_V_ADD_LSHL_U32 = 0x000001fe; - constexpr unsigned int SQ_V_ADD_U16 = 0x00000026; - constexpr unsigned int SQ_V_ADD_U32 = 0x00000034; - constexpr unsigned int SQ_V_ALIGNBIT_B32 = 0x000001ce; - constexpr unsigned int SQ_V_ALIGNBYTE_B32 = 0x000001cf; - constexpr unsigned int SQ_V_AND_B32 = 0x00000013; - constexpr unsigned int SQ_V_AND_OR_B32 = 0x00000201; - constexpr unsigned int SQ_V_ASHRREV_I16 = 0x0000002c; - constexpr unsigned int SQ_V_ASHRREV_I32 = 0x00000011; - constexpr unsigned int SQ_V_ASHRREV_I64 = 0x00000291; - constexpr unsigned int SQ_V_BCNT_U32_B32 = 0x0000028b; - constexpr unsigned int SQ_V_BFE_I32 = 0x000001c9; - constexpr unsigned int SQ_V_BFE_U32 = 0x000001c8; - constexpr unsigned int SQ_V_BFI_B32 = 0x000001ca; - constexpr unsigned int SQ_V_BFM_B32 = 0x00000293; - constexpr unsigned int SQ_V_BFREV_B32 = 0x0000002c; - constexpr unsigned int SQ_V_CEIL_F16 = 0x00000045; - constexpr unsigned int SQ_V_CEIL_F32 = 0x0000001d; - constexpr unsigned int SQ_V_CEIL_F64 = 0x00000018; - constexpr unsigned int SQ_V_CLREXCP = 0x00000035; - constexpr unsigned int SQ_V_CMPX_CLASS_F16 = 0x00000015; - constexpr unsigned int SQ_V_CMPX_CLASS_F32 = 0x00000011; - constexpr unsigned int SQ_V_CMPX_CLASS_F64 = 0x00000013; - constexpr unsigned int SQ_V_CMPX_EQ_F16 = 0x00000032; - constexpr unsigned int SQ_V_CMPX_EQ_F32 = 0x00000052; - constexpr unsigned int SQ_V_CMPX_EQ_F64 = 0x00000072; - constexpr unsigned int SQ_V_CMPX_EQ_I16 = 0x000000b2; - constexpr unsigned int SQ_V_CMPX_EQ_I32 = 0x000000d2; - constexpr unsigned int SQ_V_CMPX_EQ_I64 = 0x000000f2; - constexpr unsigned int SQ_V_CMPX_EQ_U16 = 0x000000ba; - constexpr unsigned int SQ_V_CMPX_EQ_U32 = 0x000000da; - constexpr unsigned int SQ_V_CMPX_EQ_U64 = 0x000000fa; - constexpr unsigned int SQ_V_CMPX_F_F16 = 0x00000030; - constexpr unsigned int SQ_V_CMPX_F_F32 = 0x00000050; - constexpr unsigned int SQ_V_CMPX_F_F64 = 0x00000070; - constexpr unsigned int SQ_V_CMPX_F_I16 = 0x000000b0; - constexpr unsigned int SQ_V_CMPX_F_I32 = 0x000000d0; - constexpr unsigned int SQ_V_CMPX_F_I64 = 0x000000f0; - constexpr unsigned int SQ_V_CMPX_F_U16 = 0x000000b8; - constexpr unsigned int SQ_V_CMPX_F_U32 = 0x000000d8; - constexpr unsigned int SQ_V_CMPX_F_U64 = 0x000000f8; - constexpr unsigned int SQ_V_CMPX_GE_F16 = 0x00000036; - constexpr unsigned int SQ_V_CMPX_GE_F32 = 0x00000056; - constexpr unsigned int SQ_V_CMPX_GE_F64 = 0x00000076; - constexpr unsigned int SQ_V_CMPX_GE_I16 = 0x000000b6; - constexpr unsigned int SQ_V_CMPX_GE_I32 = 0x000000d6; - constexpr unsigned int SQ_V_CMPX_GE_I64 = 0x000000f6; - constexpr unsigned int SQ_V_CMPX_GE_U16 = 0x000000be; - constexpr unsigned int SQ_V_CMPX_GE_U32 = 0x000000de; - constexpr unsigned int SQ_V_CMPX_GE_U64 = 0x000000fe; - constexpr unsigned int SQ_V_CMPX_GT_F16 = 0x00000034; - constexpr unsigned int SQ_V_CMPX_GT_F32 = 0x00000054; - constexpr unsigned int SQ_V_CMPX_GT_F64 = 0x00000074; - constexpr unsigned int SQ_V_CMPX_GT_I16 = 0x000000b4; - constexpr unsigned int SQ_V_CMPX_GT_I32 = 0x000000d4; - constexpr unsigned int SQ_V_CMPX_GT_I64 = 0x000000f4; - constexpr unsigned int SQ_V_CMPX_GT_U16 = 0x000000bc; - constexpr unsigned int SQ_V_CMPX_GT_U32 = 0x000000dc; - constexpr unsigned int SQ_V_CMPX_GT_U64 = 0x000000fc; - constexpr unsigned int SQ_V_CMPX_LE_F16 = 0x00000033; - constexpr unsigned int SQ_V_CMPX_LE_F32 = 0x00000053; - constexpr unsigned int SQ_V_CMPX_LE_F64 = 0x00000073; - constexpr unsigned int SQ_V_CMPX_LE_I16 = 0x000000b3; - constexpr unsigned int SQ_V_CMPX_LE_I32 = 0x000000d3; - constexpr unsigned int SQ_V_CMPX_LE_I64 = 0x000000f3; - constexpr unsigned int SQ_V_CMPX_LE_U16 = 0x000000bb; - constexpr unsigned int SQ_V_CMPX_LE_U32 = 0x000000db; - constexpr unsigned int SQ_V_CMPX_LE_U64 = 0x000000fb; - constexpr unsigned int SQ_V_CMPX_LG_F16 = 0x00000035; - constexpr unsigned int SQ_V_CMPX_LG_F32 = 0x00000055; - constexpr unsigned int SQ_V_CMPX_LG_F64 = 0x00000075; - constexpr unsigned int SQ_V_CMPX_LT_F16 = 0x00000031; - constexpr unsigned int SQ_V_CMPX_LT_F32 = 0x00000051; - constexpr unsigned int SQ_V_CMPX_LT_F64 = 0x00000071; - constexpr unsigned int SQ_V_CMPX_LT_I16 = 0x000000b1; - constexpr unsigned int SQ_V_CMPX_LT_I32 = 0x000000d1; - constexpr unsigned int SQ_V_CMPX_LT_I64 = 0x000000f1; - constexpr unsigned int SQ_V_CMPX_LT_U16 = 0x000000b9; - constexpr unsigned int SQ_V_CMPX_LT_U32 = 0x000000d9; - constexpr unsigned int SQ_V_CMPX_LT_U64 = 0x000000f9; - constexpr unsigned int SQ_V_CMPX_NEQ_F16 = 0x0000003d; - constexpr unsigned int SQ_V_CMPX_NEQ_F32 = 0x0000005d; - constexpr unsigned int SQ_V_CMPX_NEQ_F64 = 0x0000007d; - constexpr unsigned int SQ_V_CMPX_NE_I16 = 0x000000b5; - constexpr unsigned int SQ_V_CMPX_NE_I32 = 0x000000d5; - constexpr unsigned int SQ_V_CMPX_NE_I64 = 0x000000f5; - constexpr unsigned int SQ_V_CMPX_NE_U16 = 0x000000bd; - constexpr unsigned int SQ_V_CMPX_NE_U32 = 0x000000dd; - constexpr unsigned int SQ_V_CMPX_NE_U64 = 0x000000fd; - constexpr unsigned int SQ_V_CMPX_NGE_F16 = 0x00000039; - constexpr unsigned int SQ_V_CMPX_NGE_F32 = 0x00000059; - constexpr unsigned int SQ_V_CMPX_NGE_F64 = 0x00000079; - constexpr unsigned int SQ_V_CMPX_NGT_F16 = 0x0000003b; - constexpr unsigned int SQ_V_CMPX_NGT_F32 = 0x0000005b; - constexpr unsigned int SQ_V_CMPX_NGT_F64 = 0x0000007b; - constexpr unsigned int SQ_V_CMPX_NLE_F16 = 0x0000003c; - constexpr unsigned int SQ_V_CMPX_NLE_F32 = 0x0000005c; - constexpr unsigned int SQ_V_CMPX_NLE_F64 = 0x0000007c; - constexpr unsigned int SQ_V_CMPX_NLG_F16 = 0x0000003a; - constexpr unsigned int SQ_V_CMPX_NLG_F32 = 0x0000005a; - constexpr unsigned int SQ_V_CMPX_NLG_F64 = 0x0000007a; - constexpr unsigned int SQ_V_CMPX_NLT_F16 = 0x0000003e; - constexpr unsigned int SQ_V_CMPX_NLT_F32 = 0x0000005e; - constexpr unsigned int SQ_V_CMPX_NLT_F64 = 0x0000007e; - constexpr unsigned int SQ_V_CMPX_O_F16 = 0x00000037; - constexpr unsigned int SQ_V_CMPX_O_F32 = 0x00000057; - constexpr unsigned int SQ_V_CMPX_O_F64 = 0x00000077; - constexpr unsigned int SQ_V_CMPX_TRU_F16 = 0x0000003f; - constexpr unsigned int SQ_V_CMPX_TRU_F32 = 0x0000005f; - constexpr unsigned int SQ_V_CMPX_TRU_F64 = 0x0000007f; - constexpr unsigned int SQ_V_CMPX_T_I16 = 0x000000b7; - constexpr unsigned int SQ_V_CMPX_T_I32 = 0x000000d7; - constexpr unsigned int SQ_V_CMPX_T_I64 = 0x000000f7; - constexpr unsigned int SQ_V_CMPX_T_U16 = 0x000000bf; - constexpr unsigned int SQ_V_CMPX_T_U32 = 0x000000df; - constexpr unsigned int SQ_V_CMPX_T_U64 = 0x000000ff; - constexpr unsigned int SQ_V_CMPX_U_F16 = 0x00000038; - constexpr unsigned int SQ_V_CMPX_U_F32 = 0x00000058; - constexpr unsigned int SQ_V_CMPX_U_F64 = 0x00000078; - constexpr unsigned int SQ_V_CMP_CLASS_F16 = 0x00000014; - constexpr unsigned int SQ_V_CMP_CLASS_F32 = 0x00000010; - constexpr unsigned int SQ_V_CMP_CLASS_F64 = 0x00000012; - constexpr unsigned int SQ_V_CMP_EQ_F16 = 0x00000022; - constexpr unsigned int SQ_V_CMP_EQ_F32 = 0x00000042; - constexpr unsigned int SQ_V_CMP_EQ_F64 = 0x00000062; - constexpr unsigned int SQ_V_CMP_EQ_I16 = 0x000000a2; - constexpr unsigned int SQ_V_CMP_EQ_I32 = 0x000000c2; - constexpr unsigned int SQ_V_CMP_EQ_I64 = 0x000000e2; - constexpr unsigned int SQ_V_CMP_EQ_U16 = 0x000000aa; - constexpr unsigned int SQ_V_CMP_EQ_U32 = 0x000000ca; - constexpr unsigned int SQ_V_CMP_EQ_U64 = 0x000000ea; - constexpr unsigned int SQ_V_CMP_F_F16 = 0x00000020; - constexpr unsigned int SQ_V_CMP_F_F32 = 0x00000040; - constexpr unsigned int SQ_V_CMP_F_F64 = 0x00000060; - constexpr unsigned int SQ_V_CMP_F_I16 = 0x000000a0; - constexpr unsigned int SQ_V_CMP_F_I32 = 0x000000c0; - constexpr unsigned int SQ_V_CMP_F_I64 = 0x000000e0; - constexpr unsigned int SQ_V_CMP_F_U16 = 0x000000a8; - constexpr unsigned int SQ_V_CMP_F_U32 = 0x000000c8; - constexpr unsigned int SQ_V_CMP_F_U64 = 0x000000e8; - constexpr unsigned int SQ_V_CMP_GE_F16 = 0x00000026; - constexpr unsigned int SQ_V_CMP_GE_F32 = 0x00000046; - constexpr unsigned int SQ_V_CMP_GE_F64 = 0x00000066; - constexpr unsigned int SQ_V_CMP_GE_I16 = 0x000000a6; - constexpr unsigned int SQ_V_CMP_GE_I32 = 0x000000c6; - constexpr unsigned int SQ_V_CMP_GE_I64 = 0x000000e6; - constexpr unsigned int SQ_V_CMP_GE_U16 = 0x000000ae; - constexpr unsigned int SQ_V_CMP_GE_U32 = 0x000000ce; - constexpr unsigned int SQ_V_CMP_GE_U64 = 0x000000ee; - constexpr unsigned int SQ_V_CMP_GT_F16 = 0x00000024; - constexpr unsigned int SQ_V_CMP_GT_F32 = 0x00000044; - constexpr unsigned int SQ_V_CMP_GT_F64 = 0x00000064; - constexpr unsigned int SQ_V_CMP_GT_I16 = 0x000000a4; - constexpr unsigned int SQ_V_CMP_GT_I32 = 0x000000c4; - constexpr unsigned int SQ_V_CMP_GT_I64 = 0x000000e4; - constexpr unsigned int SQ_V_CMP_GT_U16 = 0x000000ac; - constexpr unsigned int SQ_V_CMP_GT_U32 = 0x000000cc; - constexpr unsigned int SQ_V_CMP_GT_U64 = 0x000000ec; - constexpr unsigned int SQ_V_CMP_LE_F16 = 0x00000023; - constexpr unsigned int SQ_V_CMP_LE_F32 = 0x00000043; - constexpr unsigned int SQ_V_CMP_LE_F64 = 0x00000063; - constexpr unsigned int SQ_V_CMP_LE_I16 = 0x000000a3; - constexpr unsigned int SQ_V_CMP_LE_I32 = 0x000000c3; - constexpr unsigned int SQ_V_CMP_LE_I64 = 0x000000e3; - constexpr unsigned int SQ_V_CMP_LE_U16 = 0x000000ab; - constexpr unsigned int SQ_V_CMP_LE_U32 = 0x000000cb; - constexpr unsigned int SQ_V_CMP_LE_U64 = 0x000000eb; - constexpr unsigned int SQ_V_CMP_LG_F16 = 0x00000025; - constexpr unsigned int SQ_V_CMP_LG_F32 = 0x00000045; - constexpr unsigned int SQ_V_CMP_LG_F64 = 0x00000065; - constexpr unsigned int SQ_V_CMP_LT_F16 = 0x00000021; - constexpr unsigned int SQ_V_CMP_LT_F32 = 0x00000041; - constexpr unsigned int SQ_V_CMP_LT_F64 = 0x00000061; - constexpr unsigned int SQ_V_CMP_LT_I16 = 0x000000a1; - constexpr unsigned int SQ_V_CMP_LT_I32 = 0x000000c1; - constexpr unsigned int SQ_V_CMP_LT_I64 = 0x000000e1; - constexpr unsigned int SQ_V_CMP_LT_U16 = 0x000000a9; - constexpr unsigned int SQ_V_CMP_LT_U32 = 0x000000c9; - constexpr unsigned int SQ_V_CMP_LT_U64 = 0x000000e9; - constexpr unsigned int SQ_V_CMP_NEQ_F16 = 0x0000002d; - constexpr unsigned int SQ_V_CMP_NEQ_F32 = 0x0000004d; - constexpr unsigned int SQ_V_CMP_NEQ_F64 = 0x0000006d; - constexpr unsigned int SQ_V_CMP_NE_I16 = 0x000000a5; - constexpr unsigned int SQ_V_CMP_NE_I32 = 0x000000c5; - constexpr unsigned int SQ_V_CMP_NE_I64 = 0x000000e5; - constexpr unsigned int SQ_V_CMP_NE_U16 = 0x000000ad; - constexpr unsigned int SQ_V_CMP_NE_U32 = 0x000000cd; - constexpr unsigned int SQ_V_CMP_NE_U64 = 0x000000ed; - constexpr unsigned int SQ_V_CMP_NGE_F16 = 0x00000029; - constexpr unsigned int SQ_V_CMP_NGE_F32 = 0x00000049; - constexpr unsigned int SQ_V_CMP_NGE_F64 = 0x00000069; - constexpr unsigned int SQ_V_CMP_NGT_F16 = 0x0000002b; - constexpr unsigned int SQ_V_CMP_NGT_F32 = 0x0000004b; - constexpr unsigned int SQ_V_CMP_NGT_F64 = 0x0000006b; - constexpr unsigned int SQ_V_CMP_NLE_F16 = 0x0000002c; - constexpr unsigned int SQ_V_CMP_NLE_F32 = 0x0000004c; - constexpr unsigned int SQ_V_CMP_NLE_F64 = 0x0000006c; - constexpr unsigned int SQ_V_CMP_NLG_F16 = 0x0000002a; - constexpr unsigned int SQ_V_CMP_NLG_F32 = 0x0000004a; - constexpr unsigned int SQ_V_CMP_NLG_F64 = 0x0000006a; - constexpr unsigned int SQ_V_CMP_NLT_F16 = 0x0000002e; - constexpr unsigned int SQ_V_CMP_NLT_F32 = 0x0000004e; - constexpr unsigned int SQ_V_CMP_NLT_F64 = 0x0000006e; - constexpr unsigned int SQ_V_CMP_O_F16 = 0x00000027; - constexpr unsigned int SQ_V_CMP_O_F32 = 0x00000047; - constexpr unsigned int SQ_V_CMP_O_F64 = 0x00000067; - constexpr unsigned int SQ_V_CMP_TRU_F16 = 0x0000002f; - constexpr unsigned int SQ_V_CMP_TRU_F32 = 0x0000004f; - constexpr unsigned int SQ_V_CMP_TRU_F64 = 0x0000006f; - constexpr unsigned int SQ_V_CMP_T_I16 = 0x000000a7; - constexpr unsigned int SQ_V_CMP_T_I32 = 0x000000c7; - constexpr unsigned int SQ_V_CMP_T_I64 = 0x000000e7; - constexpr unsigned int SQ_V_CMP_T_U16 = 0x000000af; - constexpr unsigned int SQ_V_CMP_T_U32 = 0x000000cf; - constexpr unsigned int SQ_V_CMP_T_U64 = 0x000000ef; - constexpr unsigned int SQ_V_CMP_U_F16 = 0x00000028; - constexpr unsigned int SQ_V_CMP_U_F32 = 0x00000048; - constexpr unsigned int SQ_V_CMP_U_F64 = 0x00000068; - constexpr unsigned int SQ_V_CNDMASK_B32 = 0x00000000; - constexpr unsigned int SQ_V_COS_F16 = 0x0000004a; - constexpr unsigned int SQ_V_COS_F32 = 0x0000002a; - constexpr unsigned int SQ_V_CUBEID_F32 = 0x000001c4; - constexpr unsigned int SQ_V_CUBEMA_F32 = 0x000001c7; - constexpr unsigned int SQ_V_CUBESC_F32 = 0x000001c5; - constexpr unsigned int SQ_V_CUBETC_F32 = 0x000001c6; - constexpr unsigned int SQ_V_CVT_F16_F32 = 0x0000000a; - constexpr unsigned int SQ_V_CVT_F16_I16 = 0x0000003a; - constexpr unsigned int SQ_V_CVT_F16_U16 = 0x00000039; - constexpr unsigned int SQ_V_CVT_F32_F16 = 0x0000000b; - constexpr unsigned int SQ_V_CVT_F32_F64 = 0x0000000f; - constexpr unsigned int SQ_V_CVT_F32_I32 = 0x00000005; - constexpr unsigned int SQ_V_CVT_F32_U32 = 0x00000006; - constexpr unsigned int SQ_V_CVT_F32_UBYTE0 = 0x00000011; - constexpr unsigned int SQ_V_CVT_F32_UBYTE1 = 0x00000012; - constexpr unsigned int SQ_V_CVT_F32_UBYTE2 = 0x00000013; - constexpr unsigned int SQ_V_CVT_F32_UBYTE3 = 0x00000014; - constexpr unsigned int SQ_V_CVT_F64_F32 = 0x00000010; - constexpr unsigned int SQ_V_CVT_F64_I32 = 0x00000004; - constexpr unsigned int SQ_V_CVT_F64_U32 = 0x00000016; - constexpr unsigned int SQ_V_CVT_FLR_I32_F32 = 0x0000000d; - constexpr unsigned int SQ_V_CVT_I16_F16 = 0x0000003c; - constexpr unsigned int SQ_V_CVT_I32_F32 = 0x00000008; - constexpr unsigned int SQ_V_CVT_I32_F64 = 0x00000003; - constexpr unsigned int SQ_V_CVT_NORM_I16_F16 = 0x0000004d; - constexpr unsigned int SQ_V_CVT_NORM_U16_F16 = 0x0000004e; - constexpr unsigned int SQ_V_CVT_OFF_F32_I4 = 0x0000000e; - constexpr unsigned int SQ_V_CVT_PKACCUM_U8_F32 = 0x000001f0; - constexpr unsigned int SQ_V_CVT_PKNORM_I16_F16 = 0x00000299; - constexpr unsigned int SQ_V_CVT_PKNORM_I16_F32 = 0x00000294; - constexpr unsigned int SQ_V_CVT_PKNORM_U16_F16 = 0x0000029a; - constexpr unsigned int SQ_V_CVT_PKNORM_U16_F32 = 0x00000295; - constexpr unsigned int SQ_V_CVT_PKRTZ_F16_F32 = 0x00000296; - constexpr unsigned int SQ_V_CVT_PK_I16_I32 = 0x00000298; - constexpr unsigned int SQ_V_CVT_PK_U8_F32 = 0x000001dd; - constexpr unsigned int SQ_V_CVT_PK_U16_U32 = 0x00000297; - constexpr unsigned int SQ_V_CVT_RPI_I32_F32 = 0x0000000c; - constexpr unsigned int SQ_V_CVT_U16_F16 = 0x0000003b; - constexpr unsigned int SQ_V_CVT_U32_F32 = 0x00000007; - constexpr unsigned int SQ_V_CVT_U32_F64 = 0x00000015; - constexpr unsigned int SQ_V_DIV_FIXUP_F16 = 0x00000207; - constexpr unsigned int SQ_V_DIV_FIXUP_F32 = 0x000001de; - constexpr unsigned int SQ_V_DIV_FIXUP_F64 = 0x000001df; - constexpr unsigned int SQ_V_DIV_FIXUP_LEGACY_F16 = 0x000001ef; - constexpr unsigned int SQ_V_DIV_FMAS_F32 = 0x000001e2; - constexpr unsigned int SQ_V_DIV_FMAS_F64 = 0x000001e3; - constexpr unsigned int SQ_V_DIV_SCALE_F32 = 0x000001e0; - constexpr unsigned int SQ_V_DIV_SCALE_F64 = 0x000001e1; - constexpr unsigned int SQ_V_EXP_F16 = 0x00000041; - constexpr unsigned int SQ_V_EXP_F32 = 0x00000020; - constexpr unsigned int SQ_V_EXP_LEGACY_F32 = 0x0000004b; - constexpr unsigned int SQ_V_FFBH_I32 = 0x0000002f; - constexpr unsigned int SQ_V_FFBH_U32 = 0x0000002d; - constexpr unsigned int SQ_V_FFBL_B32 = 0x0000002e; - constexpr unsigned int SQ_V_FLOOR_F16 = 0x00000044; - constexpr unsigned int SQ_V_FLOOR_F32 = 0x0000001f; - constexpr unsigned int SQ_V_FLOOR_F64 = 0x0000001a; - constexpr unsigned int SQ_V_FMA_F16 = 0x00000206; - constexpr unsigned int SQ_V_FMA_F32 = 0x000001cb; - constexpr unsigned int SQ_V_FMA_F64 = 0x000001cc; - constexpr unsigned int SQ_V_FMA_LEGACY_F16 = 0x000001ee; - constexpr unsigned int SQ_V_FRACT_F16 = 0x00000048; - constexpr unsigned int SQ_V_FRACT_F32 = 0x0000001b; - constexpr unsigned int SQ_V_FRACT_F64 = 0x00000032; - constexpr unsigned int SQ_V_FREXP_EXP_I16_F16 = 0x00000043; - constexpr unsigned int SQ_V_FREXP_EXP_I32_F32 = 0x00000033; - constexpr unsigned int SQ_V_FREXP_EXP_I32_F64 = 0x00000030; - constexpr unsigned int SQ_V_FREXP_MANT_F16 = 0x00000042; - constexpr unsigned int SQ_V_FREXP_MANT_F32 = 0x00000034; - constexpr unsigned int SQ_V_FREXP_MANT_F64 = 0x00000031; - constexpr unsigned int SQ_V_INTERP_MOV_F32 = 0x00000002; - constexpr unsigned int SQ_V_INTERP_P1LL_F16 = 0x00000274; - constexpr unsigned int SQ_V_INTERP_P1LV_F16 = 0x00000275; - constexpr unsigned int SQ_V_INTERP_P1_F32 = 0x00000000; - constexpr unsigned int SQ_V_INTERP_P2_F16 = 0x00000277; - constexpr unsigned int SQ_V_INTERP_P2_F32 = 0x00000001; - constexpr unsigned int SQ_V_INTERP_P2_LEGACY_F16 = 0x00000276; - constexpr unsigned int SQ_V_INTRP_COUNT = 0x00000004; - constexpr unsigned int SQ_V_INTRP_OFFSET = 0x00000270; - constexpr unsigned int SQ_V_LDEXP_F16 = 0x00000033; - constexpr unsigned int SQ_V_LDEXP_F32 = 0x00000288; - constexpr unsigned int SQ_V_LDEXP_F64 = 0x00000284; - constexpr unsigned int SQ_V_LERP_U8 = 0x000001cd; - constexpr unsigned int SQ_V_LOG_F16 = 0x00000040; - constexpr unsigned int SQ_V_LOG_F32 = 0x00000021; - constexpr unsigned int SQ_V_LOG_LEGACY_F32 = 0x0000004c; - constexpr unsigned int SQ_V_LSHLREV_B16 = 0x0000002a; - constexpr unsigned int SQ_V_LSHLREV_B32 = 0x00000012; - constexpr unsigned int SQ_V_LSHLREV_B64 = 0x0000028f; - constexpr unsigned int SQ_V_LSHL_ADD_U32 = 0x000001fd; - constexpr unsigned int SQ_V_LSHL_OR_B32 = 0x00000200; - constexpr unsigned int SQ_V_LSHRREV_B16 = 0x0000002b; - constexpr unsigned int SQ_V_LSHRREV_B32 = 0x00000010; - constexpr unsigned int SQ_V_LSHRREV_B64 = 0x00000290; - constexpr unsigned int SQ_V_MAC_F16 = 0x00000023; - constexpr unsigned int SQ_V_MAC_F32 = 0x00000016; - constexpr unsigned int SQ_V_MAC_LEGACY_F32 = 0x0000028e; - constexpr unsigned int SQ_V_MADAK_F16 = 0x00000025; - constexpr unsigned int SQ_V_MADAK_F32 = 0x00000018; - constexpr unsigned int SQ_V_MADMK_F16 = 0x00000024; - constexpr unsigned int SQ_V_MADMK_F32 = 0x00000017; - constexpr unsigned int SQ_V_MAD_F16 = 0x00000203; - constexpr unsigned int SQ_V_MAD_F32 = 0x000001c1; - constexpr unsigned int SQ_V_MAD_I16 = 0x00000205; - constexpr unsigned int SQ_V_MAD_I32_I16 = 0x000001f2; - constexpr unsigned int SQ_V_MAD_I32_I24 = 0x000001c2; - constexpr unsigned int SQ_V_MAD_I64_I32 = 0x000001e9; - constexpr unsigned int SQ_V_MAD_LEGACY_F16 = 0x000001ea; - constexpr unsigned int SQ_V_MAD_LEGACY_F32 = 0x000001c0; - constexpr unsigned int SQ_V_MAD_LEGACY_I16 = 0x000001ec; - constexpr unsigned int SQ_V_MAD_LEGACY_U16 = 0x000001eb; - constexpr unsigned int SQ_V_MAD_MIXHI_F16 = 0x00000022; - constexpr unsigned int SQ_V_MAD_MIXLO_F16 = 0x00000021; - constexpr unsigned int SQ_V_MAD_MIX_F32 = 0x00000020; - constexpr unsigned int SQ_V_MAD_U16 = 0x00000204; - constexpr unsigned int SQ_V_MAD_U32_U16 = 0x000001f1; - constexpr unsigned int SQ_V_MAD_U32_U24 = 0x000001c3; - constexpr unsigned int SQ_V_MAD_U64_U32 = 0x000001e8; - constexpr unsigned int SQ_V_MAX3_F16 = 0x000001f7; - constexpr unsigned int SQ_V_MAX3_F32 = 0x000001d3; - constexpr unsigned int SQ_V_MAX3_I16 = 0x000001f8; - constexpr unsigned int SQ_V_MAX3_I32 = 0x000001d4; - constexpr unsigned int SQ_V_MAX3_U16 = 0x000001f9; - constexpr unsigned int SQ_V_MAX3_U32 = 0x000001d5; - constexpr unsigned int SQ_V_MAX_F16 = 0x0000002d; - constexpr unsigned int SQ_V_MAX_F32 = 0x0000000b; - constexpr unsigned int SQ_V_MAX_F64 = 0x00000283; - constexpr unsigned int SQ_V_MAX_I16 = 0x00000030; - constexpr unsigned int SQ_V_MAX_I32 = 0x0000000d; - constexpr unsigned int SQ_V_MAX_U16 = 0x0000002f; - constexpr unsigned int SQ_V_MAX_U32 = 0x0000000f; - constexpr unsigned int SQ_V_MBCNT_HI_U32_B32 = 0x0000028d; - constexpr unsigned int SQ_V_MBCNT_LO_U32_B32 = 0x0000028c; - constexpr unsigned int SQ_V_MED3_F16 = 0x000001fa; - constexpr unsigned int SQ_V_MED3_F32 = 0x000001d6; - constexpr unsigned int SQ_V_MED3_I16 = 0x000001fb; - constexpr unsigned int SQ_V_MED3_I32 = 0x000001d7; - constexpr unsigned int SQ_V_MED3_U16 = 0x000001fc; - constexpr unsigned int SQ_V_MED3_U32 = 0x000001d8; - constexpr unsigned int SQ_V_MIN3_F16 = 0x000001f4; - constexpr unsigned int SQ_V_MIN3_F32 = 0x000001d0; - constexpr unsigned int SQ_V_MIN3_I16 = 0x000001f5; - constexpr unsigned int SQ_V_MIN3_I32 = 0x000001d1; - constexpr unsigned int SQ_V_MIN3_U16 = 0x000001f6; - constexpr unsigned int SQ_V_MIN3_U32 = 0x000001d2; - constexpr unsigned int SQ_V_MIN_F16 = 0x0000002e; - constexpr unsigned int SQ_V_MIN_F32 = 0x0000000a; - constexpr unsigned int SQ_V_MIN_F64 = 0x00000282; - constexpr unsigned int SQ_V_MIN_I16 = 0x00000032; - constexpr unsigned int SQ_V_MIN_I32 = 0x0000000c; - constexpr unsigned int SQ_V_MIN_U16 = 0x00000031; - constexpr unsigned int SQ_V_MIN_U32 = 0x0000000e; - constexpr unsigned int SQ_V_MOV_B32 = 0x00000001; - constexpr unsigned int SQ_V_MOV_FED_B32 = 0x00000009; - constexpr unsigned int SQ_V_MOV_PRSV_B32 = 0x00000036; - constexpr unsigned int SQ_V_MQSAD_PK_U16_U8 = 0x000001e6; - constexpr unsigned int SQ_V_MQSAD_U32_U8 = 0x000001e7; - constexpr unsigned int SQ_V_MSAD_U8 = 0x000001e4; - constexpr unsigned int SQ_V_MUL_F16 = 0x00000022; - constexpr unsigned int SQ_V_MUL_F32 = 0x00000005; - constexpr unsigned int SQ_V_MUL_F64 = 0x00000281; - constexpr unsigned int SQ_V_MUL_HI_I32 = 0x00000287; - constexpr unsigned int SQ_V_MUL_HI_I32_I24 = 0x00000007; - constexpr unsigned int SQ_V_MUL_HI_U32 = 0x00000286; - constexpr unsigned int SQ_V_MUL_HI_U32_U24 = 0x00000009; - constexpr unsigned int SQ_V_MUL_I32_I24 = 0x00000006; - constexpr unsigned int SQ_V_MUL_LEGACY_F32 = 0x00000004; - constexpr unsigned int SQ_V_MUL_LO_U16 = 0x00000029; - constexpr unsigned int SQ_V_MUL_LO_U32 = 0x00000285; - constexpr unsigned int SQ_V_MUL_U32_U24 = 0x00000008; - constexpr unsigned int SQ_V_NOP = 0x00000000; - constexpr unsigned int SQ_V_NOT_B32 = 0x0000002b; - constexpr unsigned int SQ_V_OP1_COUNT = 0x00000080; - constexpr unsigned int SQ_V_OP1_OFFSET = 0x00000140; - constexpr unsigned int SQ_V_OP2_COUNT = 0x00000040; - constexpr unsigned int SQ_V_OP2_OFFSET = 0x00000100; - constexpr unsigned int SQ_V_OP3P_COUNT = 0x00000080; - constexpr unsigned int SQ_V_OP3P_OFFSET = 0x00000380; - constexpr unsigned int SQ_V_OP3_2IN_COUNT = 0x00000080; - constexpr unsigned int SQ_V_OP3_2IN_OFFSET = 0x00000280; - constexpr unsigned int SQ_V_OP3_3IN_COUNT = 0x000000b0; - constexpr unsigned int SQ_V_OP3_3IN_OFFSET = 0x000001c0; - constexpr unsigned int SQ_V_OP3_INTRP_COUNT = 0x0000000c; - constexpr unsigned int SQ_V_OP3_INTRP_OFFSET = 0x00000274; - constexpr unsigned int SQ_V_OPC_COUNT = 0x00000100; - constexpr unsigned int SQ_V_OPC_OFFSET = 0x00000000; - constexpr unsigned int SQ_V_OR3_B32 = 0x00000202; - constexpr unsigned int SQ_V_OR_B32 = 0x00000014; - constexpr unsigned int SQ_V_PACK_B32_F16 = 0x000002a0; - constexpr unsigned int SQ_V_PERM_B32 = 0x000001ed; - constexpr unsigned int SQ_V_PK_ADD_F16 = 0x0000000f; - constexpr unsigned int SQ_V_PK_ADD_I16 = 0x00000002; - constexpr unsigned int SQ_V_PK_ADD_U16 = 0x0000000a; - constexpr unsigned int SQ_V_PK_ASHRREV_I16 = 0x00000006; - constexpr unsigned int SQ_V_PK_FMA_F16 = 0x0000000e; - constexpr unsigned int SQ_V_PK_LSHLREV_B16 = 0x00000004; - constexpr unsigned int SQ_V_PK_LSHRREV_B16 = 0x00000005; - constexpr unsigned int SQ_V_PK_MAD_I16 = 0x00000000; - constexpr unsigned int SQ_V_PK_MAD_U16 = 0x00000009; - constexpr unsigned int SQ_V_PK_MAX_F16 = 0x00000012; - constexpr unsigned int SQ_V_PK_MAX_I16 = 0x00000007; - constexpr unsigned int SQ_V_PK_MAX_U16 = 0x0000000c; - constexpr unsigned int SQ_V_PK_MIN_F16 = 0x00000011; - constexpr unsigned int SQ_V_PK_MIN_I16 = 0x00000008; - constexpr unsigned int SQ_V_PK_MIN_U16 = 0x0000000d; - constexpr unsigned int SQ_V_PK_MUL_F16 = 0x00000010; - constexpr unsigned int SQ_V_PK_MUL_LO_U16 = 0x00000001; - constexpr unsigned int SQ_V_PK_SUB_I16 = 0x00000003; - constexpr unsigned int SQ_V_PK_SUB_U16 = 0x0000000b; - constexpr unsigned int SQ_V_QSAD_PK_U16_U8 = 0x000001e5; - constexpr unsigned int SQ_V_RCP_F16 = 0x0000003d; - constexpr unsigned int SQ_V_RCP_F32 = 0x00000022; - constexpr unsigned int SQ_V_RCP_F64 = 0x00000025; - constexpr unsigned int SQ_V_RCP_IFLAG_F32 = 0x00000023; - constexpr unsigned int SQ_V_READFIRSTLANE_B32 = 0x00000002; - constexpr unsigned int SQ_V_READLANE_B32 = 0x00000289; - constexpr unsigned int SQ_V_READLANE_REGRD_B32 = 0x0000029b; - constexpr unsigned int SQ_V_RNDNE_F16 = 0x00000047; - constexpr unsigned int SQ_V_RNDNE_F32 = 0x0000001e; - constexpr unsigned int SQ_V_RNDNE_F64 = 0x00000019; - constexpr unsigned int SQ_V_RSQ_F16 = 0x0000003f; - constexpr unsigned int SQ_V_RSQ_F32 = 0x00000024; - constexpr unsigned int SQ_V_RSQ_F64 = 0x00000026; - constexpr unsigned int SQ_V_SAD_HI_U8 = 0x000001da; - constexpr unsigned int SQ_V_SAD_U8 = 0x000001d9; - constexpr unsigned int SQ_V_SAD_U16 = 0x000001db; - constexpr unsigned int SQ_V_SAD_U32 = 0x000001dc; - constexpr unsigned int SQ_V_SAT_PK_U8_I16 = 0x0000004f; - constexpr unsigned int SQ_V_SCREEN_PARTITION_4SE_B32 = 0x00000037; - constexpr unsigned int SQ_V_SIN_F16 = 0x00000049; - constexpr unsigned int SQ_V_SIN_F32 = 0x00000029; - constexpr unsigned int SQ_V_SQRT_F16 = 0x0000003e; - constexpr unsigned int SQ_V_SQRT_F32 = 0x00000027; - constexpr unsigned int SQ_V_SQRT_F64 = 0x00000028; - constexpr unsigned int SQ_V_SUBBREV_CO_U32 = 0x0000001e; - constexpr unsigned int SQ_V_SUBB_CO_U32 = 0x0000001d; - constexpr unsigned int SQ_V_SUBREV_CO_U32 = 0x0000001b; - constexpr unsigned int SQ_V_SUBREV_F16 = 0x00000021; - constexpr unsigned int SQ_V_SUBREV_F32 = 0x00000003; - constexpr unsigned int SQ_V_SUBREV_U16 = 0x00000028; - constexpr unsigned int SQ_V_SUBREV_U32 = 0x00000036; - constexpr unsigned int SQ_V_SUB_CO_U32 = 0x0000001a; - constexpr unsigned int SQ_V_SUB_F16 = 0x00000020; - constexpr unsigned int SQ_V_SUB_F32 = 0x00000002; - constexpr unsigned int SQ_V_SUB_I16 = 0x0000029f; - constexpr unsigned int SQ_V_SUB_I32 = 0x0000029d; - constexpr unsigned int SQ_V_SUB_U16 = 0x00000027; - constexpr unsigned int SQ_V_SUB_U32 = 0x00000035; - constexpr unsigned int SQ_V_SWAP_B32 = 0x00000051; - constexpr unsigned int SQ_V_TRIG_PREOP_F64 = 0x00000292; - constexpr unsigned int SQ_V_TRUNC_F16 = 0x00000046; - constexpr unsigned int SQ_V_TRUNC_F32 = 0x0000001c; - constexpr unsigned int SQ_V_TRUNC_F64 = 0x00000017; - constexpr unsigned int SQ_V_WRITELANE_B32 = 0x0000028a; - constexpr unsigned int SQ_V_WRITELANE_REGWR_B32 = 0x00000050; - constexpr unsigned int SQ_V_XAD_U32 = 0x000001f3; - constexpr unsigned int SQ_V_XOR_B32 = 0x00000015; - constexpr unsigned int SQ_WAITCNT_EXP_SHIFT = 0x00000004; - constexpr unsigned int SQ_WAITCNT_EXP_SIZE = 0x00000003; - constexpr unsigned int SQ_WAITCNT_LGKM_SHIFT = 0x00000008; - constexpr unsigned int SQ_WAITCNT_LGKM_SIZE = 0x00000004; - constexpr unsigned int SQ_WAITCNT_VM_HI_SHIFT = 0x0000000e; - constexpr unsigned int SQ_WAITCNT_VM_HI_SIZE = 0x00000002; - constexpr unsigned int SQ_WAITCNT_VM_SHIFT = 0x00000000; - constexpr unsigned int SQ_WAITCNT_VM_SIZE = 0x00000004; - constexpr unsigned int SQ_XLATE_VOP3_TO_VINTRP_COUNT = 0x00000004; - constexpr unsigned int SQ_XLATE_VOP3_TO_VINTRP_OFFSET = 0x00000270; - constexpr unsigned int SQ_XLATE_VOP3_TO_VOP1_COUNT = 0x00000080; - constexpr unsigned int SQ_XLATE_VOP3_TO_VOP1_OFFSET = 0x00000140; - constexpr unsigned int SQ_XLATE_VOP3_TO_VOP2_COUNT = 0x00000040; - constexpr unsigned int SQ_XLATE_VOP3_TO_VOP2_OFFSET = 0x00000100; - constexpr unsigned int SQ_XLATE_VOP3_TO_VOP3P_COUNT = 0x00000080; - constexpr unsigned int SQ_XLATE_VOP3_TO_VOP3P_OFFSET = 0x00000380; - constexpr unsigned int SQ_XLATE_VOP3_TO_VOPC_COUNT = 0x00000100; - constexpr unsigned int SQ_XLATE_VOP3_TO_VOPC_OFFSET = 0x00000000; - constexpr unsigned int SQ_XNACK_MASK_HI = 0x00000069; - constexpr unsigned int SQ_XNACK_MASK_LO = 0x00000068; -} // namespace Gfx09 - -namespace Gfx09_10 -{ - constexpr unsigned int CONTEXT_SPACE_END = 0x0000bfff; -} // namespace Gfx09_10 - -namespace Gfx10 -{ - constexpr unsigned int SQ_WAVE_IB_DEP_VA_VDST_SIZE = 0x00000004; -} // namespace Gfx10 - -namespace Gfx101 -{ - constexpr unsigned int GB_TILE_MODE0_GEN0 = 0x90800310; - constexpr unsigned int GB_TILE_MODE0_GEN1 = 0x90800310; - constexpr unsigned int GB_TILE_MODE1_GEN0 = 0x90800b10; - constexpr unsigned int GB_TILE_MODE1_GEN1 = 0x90800b10; - constexpr unsigned int GB_TILE_MODE2_GEN0 = 0x90801310; - constexpr unsigned int GB_TILE_MODE2_GEN1 = 0x90801310; - constexpr unsigned int GB_TILE_MODE3_GEN0 = 0x90801b10; - constexpr unsigned int GB_TILE_MODE3_GEN1 = 0x90801b10; - constexpr unsigned int GB_TILE_MODE4_GEN0 = 0x90802310; - constexpr unsigned int GB_TILE_MODE4_GEN1 = 0x90802310; - constexpr unsigned int GB_TILE_MODE5_GEN0 = 0x90800308; - constexpr unsigned int GB_TILE_MODE5_GEN1 = 0x90800308; - constexpr unsigned int GB_TILE_MODE6_GEN0 = 0x90801318; - constexpr unsigned int GB_TILE_MODE6_GEN1 = 0x90801318; - constexpr unsigned int GB_TILE_MODE7_GEN0 = 0x90802318; - constexpr unsigned int GB_TILE_MODE7_GEN1 = 0x90802318; - constexpr unsigned int GB_TILE_MODE8_GEN0 = 0x90000304; - constexpr unsigned int GB_TILE_MODE8_GEN1 = 0x90000304; - constexpr unsigned int GB_TILE_MODE9_GEN0 = 0x90000308; - constexpr unsigned int GB_TILE_MODE9_GEN1 = 0x90000308; - constexpr unsigned int GB_TILE_MODE10_GEN0 = 0x92000310; - constexpr unsigned int GB_TILE_MODE10_GEN1 = 0x92000310; - constexpr unsigned int GB_TILE_MODE11_GEN0 = 0x92000294; - constexpr unsigned int GB_TILE_MODE11_GEN1 = 0x92000294; - constexpr unsigned int GB_TILE_MODE12_GEN0 = 0x92000318; - constexpr unsigned int GB_TILE_MODE12_GEN1 = 0x92000318; - constexpr unsigned int GB_TILE_MODE13_GEN0 = 0x90400308; - constexpr unsigned int GB_TILE_MODE13_GEN1 = 0x90400308; - constexpr unsigned int GB_TILE_MODE14_GEN0 = 0x92400310; - constexpr unsigned int GB_TILE_MODE14_GEN1 = 0x92400310; - constexpr unsigned int GB_TILE_MODE15_GEN0 = 0x924002b0; - constexpr unsigned int GB_TILE_MODE15_GEN1 = 0x924002b0; - constexpr unsigned int GB_TILE_MODE16_GEN0 = 0x92400294; - constexpr unsigned int GB_TILE_MODE16_GEN1 = 0x92400294; - constexpr unsigned int GB_TILE_MODE17_GEN0 = 0x92400318; - constexpr unsigned int GB_TILE_MODE17_GEN1 = 0x92400318; - constexpr unsigned int GB_TILE_MODE18_GEN0 = 0x9240032c; - constexpr unsigned int GB_TILE_MODE18_GEN1 = 0x9240032c; - constexpr unsigned int GB_TILE_MODE19_GEN0 = 0x9100030c; - constexpr unsigned int GB_TILE_MODE19_GEN1 = 0x9100030c; - constexpr unsigned int GB_TILE_MODE20_GEN0 = 0x9100031c; - constexpr unsigned int GB_TILE_MODE20_GEN1 = 0x9100031c; - constexpr unsigned int GB_TILE_MODE21_GEN0 = 0x910002b4; - constexpr unsigned int GB_TILE_MODE21_GEN1 = 0x910002b4; - constexpr unsigned int GB_TILE_MODE22_GEN0 = 0x910002a4; - constexpr unsigned int GB_TILE_MODE22_GEN1 = 0x910002a4; - constexpr unsigned int GB_TILE_MODE23_GEN0 = 0x91000328; - constexpr unsigned int GB_TILE_MODE23_GEN1 = 0x91000328; - constexpr unsigned int GB_TILE_MODE24_GEN0 = 0x910002bc; - constexpr unsigned int GB_TILE_MODE24_GEN1 = 0x910002bc; - constexpr unsigned int GB_TILE_MODE25_GEN0 = 0x91000320; - constexpr unsigned int GB_TILE_MODE25_GEN1 = 0x91000320; - constexpr unsigned int GB_TILE_MODE26_GEN0 = 0x910002b8; - constexpr unsigned int GB_TILE_MODE26_GEN1 = 0x910002b8; - constexpr unsigned int GB_TILE_MODE27_GEN0 = 0x90c00308; - constexpr unsigned int GB_TILE_MODE27_GEN1 = 0x90c00308; - constexpr unsigned int GB_TILE_MODE28_GEN0 = 0x92c00310; - constexpr unsigned int GB_TILE_MODE28_GEN1 = 0x92c00310; - constexpr unsigned int GB_TILE_MODE29_GEN0 = 0x92c00294; - constexpr unsigned int GB_TILE_MODE29_GEN1 = 0x92c00294; - constexpr unsigned int GB_TILE_MODE30_GEN0 = 0x92c00318; - constexpr unsigned int GB_TILE_MODE30_GEN1 = 0x92c00318; - constexpr unsigned int GB_TILE_MODE31_GEN0 = 0x00000000; - constexpr unsigned int GB_TILE_MODE31_GEN1 = 0x00000000; - constexpr unsigned int INST_ID_XNACK_OVERRIDE = 0xfffffff5; - constexpr unsigned int IP_USB_PD_REVISION_ID = 0x00000000; - constexpr unsigned int ROM_SIGNATURE = 0x0000aa55; -} // namespace Gfx101 - -namespace Gfx103PlusExclusive -{ - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_HOLD_CNT_SIZE = 0x00000001; - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_HOLD_CNT_START = 0x00000007; - constexpr unsigned int SQ_WAVE_IB_DEP_HOLD_CNT_SIZE = 0x00000001; -} // namespace Gfx103PlusExclusive - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -namespace Gfx104Plus -{ - constexpr unsigned int SIMM16_WAITCNT_EXP_CNT_START = 0x00000000; - constexpr unsigned int SIMM16_WAITCNT_LGKM_CNT_START = 0x00000004; - constexpr unsigned int SIMM16_WAITCNT_VM_CNT_SIZE = 0x00000006; - constexpr unsigned int SIMM16_WAITCNT_VM_CNT_START = 0x0000000a; -} // namespace Gfx104Plus -#endif - -namespace Gfx10Core -{ - constexpr unsigned int SIMM16_WAITCNT_EXP_CNT_START = 0x00000004; - constexpr unsigned int SIMM16_WAITCNT_LGKM_CNT_START = 0x00000008; - constexpr unsigned int SIMM16_WAITCNT_VM_CNT_HI_SIZE = 0x00000002; - constexpr unsigned int SIMM16_WAITCNT_VM_CNT_HI_START = 0x0000000e; - constexpr unsigned int SIMM16_WAITCNT_VM_CNT_SIZE = 0x00000004; - constexpr unsigned int SIMM16_WAITCNT_VM_CNT_START = 0x00000000; -} // namespace Gfx10Core - -namespace Gfx10CorePlus -{ - constexpr unsigned int SEM_RESP_FAILED = 0x00000002; - constexpr unsigned int SEM_RESP_PASSED = 0x00000003; -} // namespace Gfx10CorePlus - -namespace Gfx10Plus -{ - constexpr unsigned int CSCNTL_ADDR_WIDTH = 0x00000007; - constexpr unsigned int CSCNTL_DATA_WIDTH = 0x00000020; - constexpr unsigned int CSCNTL_TYPE_WIDTH = 0x00000002; - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_SA_SDST_SIZE = 0x00000001; - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_SA_SDST_START = 0x00000000; - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_VA_SDST_SIZE = 0x00000003; - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_VA_SDST_START = 0x00000009; - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_VA_SSRC_SIZE = 0x00000001; - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_VA_SSRC_START = 0x00000008; - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_VA_VCC_SIZE = 0x00000001; - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_VA_VCC_START = 0x00000001; - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_VA_VDST_SIZE = 0x00000004; - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_VA_VDST_START = 0x0000000c; - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_VM_VSRC_SIZE = 0x00000003; - constexpr unsigned int SIMM16_WAITCNT_DEPCTR_VM_VSRC_START = 0x00000002; - constexpr unsigned int SIMM16_WAITCNT_LGKM_CNT_SIZE = 0x00000006; - constexpr unsigned int SQIND_WAVE_HWREGS_OFFSET = 0x00000100; - constexpr unsigned int SQIND_WAVE_HWREGS_SIZE = 0x00000100; - constexpr unsigned int SQIND_WAVE_VGPRS_SIZE = 0x00000400; - constexpr unsigned int SQ_WAVE_IB_DEP_LDS_DIR_SIZE = 0x00000003; - constexpr unsigned int SQ_WAVE_IB_DEP_SA_EXEC_SIZE = 0x00000002; - constexpr unsigned int SQ_WAVE_IB_DEP_SA_M0_SIZE = 0x00000001; - constexpr unsigned int SQ_WAVE_IB_DEP_SA_SDST_SIZE = 0x00000004; - constexpr unsigned int SQ_WAVE_IB_DEP_VA_EXEC_SIZE = 0x00000002; - constexpr unsigned int SQ_WAVE_IB_DEP_VA_SDST_SIZE = 0x00000004; - constexpr unsigned int SQ_WAVE_IB_DEP_VA_SSRC_SIZE = 0x00000003; - constexpr unsigned int SQ_WAVE_IB_DEP_VA_VCC_SIZE = 0x00000003; - constexpr unsigned int SQ_WAVE_IB_DEP_VM_VSRC_SIZE = 0x00000004; -} // namespace Gfx10Plus - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -namespace Gfx11 -{ - constexpr unsigned int CONTEXT_SPACE_END = 0x0000a3ff; - constexpr unsigned int INST_ID_HW_TRAP_GET_TBA = 0xfffffff5; - constexpr unsigned int PFVF_SQDEC_BEGIN = 0x0000a9e0; - constexpr unsigned int PFVF_SQDEC_END = 0x0000a9ff; - constexpr unsigned int SIMM16_WAIT_EVENT_EXP_RDY_SIZE = 0x00000001; - constexpr unsigned int SIMM16_WAIT_EVENT_EXP_RDY_START = 0x00000000; - constexpr unsigned int SQ_WAVE_IB_DEP_VA_VDST_SIZE = 0x00000005; -} // namespace Gfx11 -#endif - -#if CHIP_HDR_NAVI21 -namespace Nv21 -{ - constexpr unsigned int IP_I2C_M_REVISION_ID = 0x00000000; - constexpr unsigned int IP_USB_PD_REVISION_ID = 0x00000000; - constexpr unsigned int ROM_SIGNATURE = 0x0000aa55; -} // namespace Nv21 -#endif - -#if CHIP_HDR_NAVI22 -namespace Nv22 -{ - constexpr unsigned int IP_I2C_M_REVISION_ID = 0x00000000; - constexpr unsigned int IP_USB_PD_REVISION_ID = 0x00000000; - constexpr unsigned int ROM_SIGNATURE = 0x0000aa55; -} // namespace Nv22 -#endif - -#if CHIP_HDR_NAVI23 -namespace Nv23 -{ - constexpr unsigned int IP_I2C_M_REVISION_ID = 0x00000000; - constexpr unsigned int IP_USB_PD_REVISION_ID = 0x00000000; - constexpr unsigned int ROM_SIGNATURE = 0x0000aa55; -} // namespace Nv23 -#endif - -#if CHIP_HDR_NAVI24 -namespace Nv24 -{ - constexpr unsigned int ROM_SIGNATURE = 0x0000aa55; -} // namespace Nv24 -#endif - -#if CHIP_HDR_NAVI31 -namespace Nv31 -{ - constexpr unsigned int IP_I2C_M_REVISION_ID = 0x00000000; - constexpr unsigned int IP_USB_PD_REVISION_ID = 0x00000000; - constexpr unsigned int ROM_SIGNATURE = 0x0000aa55; -} // namespace Nv31 -#endif - -namespace Raphael -{ - constexpr unsigned int AL_REVISION_ID = 0x00000012; - constexpr unsigned int EMMC_CFG_REVISION_ID = 0x00000000; - constexpr unsigned int EMMC_HC_REG_REVISION_ID = 0x00000000; - constexpr unsigned int ENHIOMEMAPERTURE_REVISION_ID = 0x00000000; - constexpr unsigned int ILA_REG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_DEVICE_CFG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_HARD_ADDR_HCLK_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_HARD_ADDR_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_P2P_CFG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_ESPI_REG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_SDP_REG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_USB_PD_REVISION_ID = 0x00000000; - constexpr unsigned int MMREG_REVISION_ID = 0x00000000; - constexpr unsigned int NUM_REQUESTORS = 0x00000005; - constexpr unsigned int REQID_ECHI2 = 0x00000004; - constexpr unsigned int REQID_EHCI = 0x00000001; - constexpr unsigned int REQID_OCHI2 = 0x00000003; - constexpr unsigned int REQID_OHCI = 0x00000000; - constexpr unsigned int REQID_SATA0 = 0x00000002; - constexpr unsigned int SDB_REG_REVISION_ID = 0x00000000; - constexpr unsigned int SD_PCI_CFG_REVISION_ID = 0x00000000; -} // namespace Raphael - -namespace Rembrandt -{ - constexpr unsigned int AL_REVISION_ID = 0x00000012; - constexpr unsigned int EMMC_CFG_REVISION_ID = 0x00000000; - constexpr unsigned int EMMC_HC_REG_REVISION_ID = 0x00000000; - constexpr unsigned int ENHIOMEMAPERTURE_REVISION_ID = 0x00000000; - constexpr unsigned int ILA_REG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_DEVICE_CFG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_HARD_ADDR_HCLK_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_HARD_ADDR_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_P2P_CFG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_ESPI_REG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_SDP_REG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_USB_PD_REVISION_ID = 0x00000000; - constexpr unsigned int MMREG_REVISION_ID = 0x00000000; - constexpr unsigned int NUM_REQUESTORS = 0x00000005; - constexpr unsigned int REQID_ECHI2 = 0x00000004; - constexpr unsigned int REQID_EHCI = 0x00000001; - constexpr unsigned int REQID_OCHI2 = 0x00000003; - constexpr unsigned int REQID_OHCI = 0x00000000; - constexpr unsigned int REQID_SATA0 = 0x00000002; - constexpr unsigned int SATA_AHCI_REG_REVISION_ID = 0x00000000; - constexpr unsigned int SATA_PCI_CFG_REVISION_ID = 0x00000000; - constexpr unsigned int SATA_REVISION_ID = 0x00000000; - constexpr unsigned int sata_sgpio_reg_REVISION_ID = 0x00000000; - constexpr unsigned int SDB_REG_REVISION_ID = 0x00000000; - constexpr unsigned int SD_PCI_CFG_REVISION_ID = 0x00000000; -} // namespace Rembrandt - -namespace Rn -{ - constexpr unsigned int AL_REVISION_ID = 0x00000012; - constexpr unsigned int EMMC_CFG_REVISION_ID = 0x00000000; - constexpr unsigned int EMMC_HC_REG_REVISION_ID = 0x00000000; - constexpr unsigned int ENHIOMEMAPERTURE_REVISION_ID = 0x00000000; - constexpr unsigned int ILA_REG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_DEVICE_CFG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_HARD_ADDR_HCLK_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_HARD_ADDR_REVISION_ID = 0x00000000; - constexpr unsigned int IP_AL2AHB_P2P_CFG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_ESPI_REG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_SDP_REG_REVISION_ID = 0x00000000; - constexpr unsigned int IP_USB_PD_REVISION_ID = 0x00000000; - constexpr unsigned int MMREG_REVISION_ID = 0x00000000; - constexpr unsigned int NUM_REQUESTORS = 0x00000005; - constexpr unsigned int REQID_ECHI2 = 0x00000004; - constexpr unsigned int REQID_EHCI = 0x00000001; - constexpr unsigned int REQID_OCHI2 = 0x00000003; - constexpr unsigned int REQID_OHCI = 0x00000000; - constexpr unsigned int REQID_SATA0 = 0x00000002; - constexpr unsigned int SATA_AHCI_REG_REVISION_ID = 0x00000000; - constexpr unsigned int SATA_REVISION_ID = 0x00000000; - constexpr unsigned int sata_sgpio_reg_REVISION_ID = 0x00000000; - constexpr unsigned int SDB_REG_REVISION_ID = 0x00000000; - constexpr unsigned int SD_PCI_CFG_REVISION_ID = 0x00000000; -} // namespace Rn - -namespace Rv1x_Rv2x -{ - constexpr unsigned int SEM_PASSED = 0x00000003; - constexpr unsigned int _SEM_FAILED = 0x00000002; -} // namespace Rv1x_Rv2x - -namespace Vg10_Vg12_Vg20_Rn -{ - constexpr unsigned int SEM_RESP_FAILED = 0x00000002; - constexpr unsigned int SEM_RESP_PASSED = 0x00000003; -} // namespace Vg10_Vg12_Vg20_Rn - -namespace Vg10_Vg12_Vg20_Rv1x_Rv2x -{ - constexpr unsigned int ROM_SIGNATURE = 0x0000aa55; - constexpr unsigned int SRCID_SECURE_E = 0x0000000e; -} // namespace Vg10_Vg12_Vg20_Rv1x_Rv2x - -namespace Vg12 -{ - constexpr unsigned int SQ_V_DOT2C_F32_F16 = 0x00000037; - constexpr unsigned int SQ_V_DOT2C_I32_I16 = 0x00000038; - constexpr unsigned int SQ_V_DOT2_I32_I16_I8 = 0x00000024; - constexpr unsigned int SQ_V_DOT2_U32_U16_U8 = 0x00000025; - constexpr unsigned int SQ_V_DOT4C_I32_I8 = 0x00000039; - constexpr unsigned int SQ_V_DOT8C_I32_I4 = 0x0000003a; - constexpr unsigned int SQ_V_PK_FMAC_F16 = 0x0000003c; -} // namespace Vg12 - -namespace Vg12_Vg20_Rn -{ - constexpr unsigned int SQ_V_DOT2_F32_F16 = 0x00000023; - constexpr unsigned int SQ_V_DOT2_I32_I16 = 0x00000026; - constexpr unsigned int SQ_V_DOT2_U32_U16 = 0x00000027; - constexpr unsigned int SQ_V_DOT4_I32_I8 = 0x00000028; - constexpr unsigned int SQ_V_DOT4_U32_U8 = 0x00000029; - constexpr unsigned int SQ_V_DOT8_I32_I4 = 0x0000002a; - constexpr unsigned int SQ_V_DOT8_U32_U4 = 0x0000002b; - constexpr unsigned int SQ_V_FMAC_F32 = 0x0000003b; - constexpr unsigned int SQ_V_XNOR_B32 = 0x0000003d; -} // namespace Vg12_Vg20_Rn - -enum PerfCtrId -{ - CBPerfSelId = 0, - ChaPerfSelId = 1, - ChcPerfSelId = 2, - ChcgPerfSelId = 3, - CpcPerfcountSelId = 4, - CpfPerfcountSelId = 5, - CpgPerfcountSelId = 6, -#if CHIP_HDR_NAVI21 || CHIP_HDR_NAVI22 || CHIP_HDR_NAVI23 || CHIP_HDR_NAVI24 || CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - DfMallPerfSelId = 7, -#endif - GCRPerfSelId = 8, - GdsPerfcountSelectId = 9, - Ge1PerfcountSelectId = 10, - Ge2DistPerfcountSelectId = 11, - Ge2SePerfcountSelectId = 12, - GePerfcountSelectId = 13, - Gl1aPerfSelId = 14, - Gl1cPerfSelId = 15, - Gl2aPerfSelId = 17, - Gl2cPerfSelId = 18, - GrbmPerfSelId = 19, - GrbmSe0PerfSelId = 20, - GrbmSe1PerfSelId = 21, - GrbmSe2PerfSelId = 22, - GrbmSe3PerfSelId = 23, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - GrbmSe4PerfSelId = 24, - GrbmSe5PerfSelId = 25, - GrbmSe6PerfSelId = 26, - GrbmSe7PerfSelId = 27, -#endif - IaPerfcountSelectId = 28, - PerfcounterValsId = 29, - PhPerfcntSelId = 30, - RMIPerfSelId = 31, - RlcPerfcounterSelId = 32, - ScPerfcntSelId = 33, - SdmaPerfSelId = 34, - SpiPerfcntSelId = 35, - SqPerfSelId = 36, -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - SqgPerfSelId = 37, -#endif - SuPerfcntSelId = 38, - SxPerfcounterValsId = 39, - TaPerfcountSelId = 40, - TcaPerfSelId = 41, - TccPerfSelId = 42, - TcpPerfcountSelectId = 43, - TdPerfcountSelId = 44, - UTCL1PerfSelId = 45, - UmcPerfcountSelectId = 46, - VgtPerfcountSelectId = 47, - WdPerfcountSelectId = 48, - MaxPerfCtrId = 49, -}; - -constexpr unsigned int Vg10MaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_CC_BB_BLEND_PIXEL_VLD__GFX09_10, - 0, - 0, - 0, - CPC_PERF_SEL_ME2_DC1_SPI_BUSY__CORE, - CPF_PERF_SEL_CPF_UTCL2IU_STALL__GFX09, - CPG_PERF_SEL_CPG_UTCL2IU_STALL__VG10_VG12_VG20_RN, - 0, - 0, - GDS_PERF_SEL_GWS_BYPASS__GFX09, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - GRBM_PERF_SEL_CPAXI_BUSY, - GRBM_SE0_PERF_SEL_RMI_BUSY, - GRBM_SE1_PERF_SEL_RMI_BUSY, - GRBM_SE2_PERF_SEL_RMI_BUSY, - GRBM_SE3_PERF_SEL_RMI_BUSY, - 0, - 0, - 0, - 0, - ia_perf_utcl1_stall_utcl2_event__GFX09_0, - DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels__GFX09, - 0, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3__GFX09, - 0, - SC_DB1_TILE_INTERFACE_CREDIT_AT_MAX_WITH_NO_PENDING_SEND__GFX09_10, - SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER__GFX09, - SPI_PERF_VWC_CSC_WR__GFX09, - SQC_PERF_SEL_DUMMY_LAST__GFX09, - 0, - PERF_CLIENT_UTCL1_INFLIGHT__GFX09_0, - SX_PERF_SEL_DB3_SIZE__GFX09_10, - TA_PERF_SEL_first_xnack_on_phase3__GFX09, - TCA_PERF_SEL_CROSSBAR_STALL_TCC7, - TCC_PERF_SEL_CLIENT127_REQ, - TCP_PERF_SEL_TCC_DCC_REQ__GFX09, - TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt__GFX09, - 0, - UMC_PERF_SEL_BeqEdcErr__VG10, - vgt_perf_sclk_te11_vld, - wd_perf_utcl1_stall_utcl2_event, -}; - -constexpr unsigned int Vg12MaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_CC_BB_BLEND_PIXEL_VLD__GFX09_10, - 0, - 0, - 0, - CPC_PERF_SEL_ME2_DC1_SPI_BUSY__CORE, - CPF_PERF_SEL_CPF_UTCL2IU_STALL__GFX09, - CPG_PERF_SEL_CPG_UTCL2IU_STALL__VG10_VG12_VG20_RN, - 0, - 0, - GDS_PERF_SEL_GWS_BYPASS__GFX09, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - GRBM_PERF_SEL_CPAXI_BUSY, - GRBM_SE0_PERF_SEL_RMI_BUSY, - GRBM_SE1_PERF_SEL_RMI_BUSY, - GRBM_SE2_PERF_SEL_RMI_BUSY, - GRBM_SE3_PERF_SEL_RMI_BUSY, - 0, - 0, - 0, - 0, - ia_perf_utcl1_stall_utcl2_event__GFX09_1X, - DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels__GFX09, - 0, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3__GFX09, - 0, - SC_BACKEND_PRIM_FIFO_FULL__VG12_VG20_RN, - SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER__GFX09, - SPI_PERF_VWC_CSC_WR__GFX09, - SQC_PERF_SEL_DUMMY_LAST__GFX09, - 0, - PERF_PA_PRIMIC_TO_CLPRIM_FIFO_FULL__GFX09_1X, - SX_PERF_SEL_DB3_SIZE__GFX09_10, - TA_PERF_SEL_first_xnack_on_phase3__GFX09, - TCA_PERF_SEL_CROSSBAR_STALL_TCC7, - TCC_PERF_SEL_CLIENT127_REQ, - TCP_PERF_SEL_TCC_DCC_REQ__GFX09, - TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt__GFX09, - 0, - 0, - vgt_perf_sclk_te11_vld, - wd_perf_utcl1_stall_utcl2_event, -}; - -constexpr unsigned int Vg20MaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_CC_BB_BLEND_PIXEL_VLD__GFX09_10, - 0, - 0, - 0, - CPC_PERF_SEL_ME2_DC1_SPI_BUSY__CORE, - CPF_PERF_SEL_CPF_UTCL2IU_STALL__GFX09, - CPG_PERF_SEL_CPG_UTCL2IU_STALL__VG10_VG12_VG20_RN, - 0, - 0, - GDS_PERF_SEL_GWS_BYPASS__GFX09, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - GRBM_PERF_SEL_CPAXI_BUSY, - GRBM_SE0_PERF_SEL_RMI_BUSY, - GRBM_SE1_PERF_SEL_RMI_BUSY, - GRBM_SE2_PERF_SEL_RMI_BUSY, - GRBM_SE3_PERF_SEL_RMI_BUSY, - 0, - 0, - 0, - 0, - ia_perf_utcl1_stall_utcl2_event__GFX09_1X, - DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels__GFX09, - 0, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3__GFX09, - 0, - SC_BACKEND_PRIM_FIFO_FULL__VG12_VG20_RN, - SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER__GFX09, - SPI_PERF_VWC_CSC_WR__GFX09, - SQC_PERF_SEL_DUMMY_LAST__GFX09, - 0, - PERF_PA_PRIMIC_TO_CLPRIM_FIFO_FULL__GFX09_1X, - SX_PERF_SEL_DB3_SIZE__GFX09_10, - TA_PERF_SEL_first_xnack_on_phase3__GFX09, - TCA_PERF_SEL_CROSSBAR_STALL_TCC7, - TCC_PERF_CLIENT_RETURN_BUBBLE__VG20, - TCP_PERF_SEL_TCC_DCC_REQ__GFX09, - TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt__GFX09, - 0, - 0, - vgt_perf_sclk_te11_vld, - wd_perf_utcl1_stall_utcl2_event, -}; - -constexpr unsigned int Rv1xMaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_CC_BB_BLEND_PIXEL_VLD__GFX09_10, - 0, - 0, - 0, - CPC_PERF_SEL_ME2_DC1_SPI_BUSY__CORE, - CPF_PERF_SEL_CPF_UTCL2IU_STALL__GFX09, - CPG_PERF_SEL_CPG_UTCL2IU_STALL__RV1X_RV2X, - 0, - 0, - GDS_PERF_SEL_GWS_BYPASS__GFX09, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - GRBM_PERF_SEL_CPAXI_BUSY, - GRBM_SE0_PERF_SEL_RMI_BUSY, - GRBM_SE1_PERF_SEL_RMI_BUSY, - GRBM_SE2_PERF_SEL_RMI_BUSY, - GRBM_SE3_PERF_SEL_RMI_BUSY, - 0, - 0, - 0, - 0, - ia_perf_utcl1_stall_utcl2_event__GFX09_0, - DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels__GFX09, - 0, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3__GFX09, - 0, - SC_DB1_TILE_INTERFACE_CREDIT_AT_MAX_WITH_NO_PENDING_SEND__GFX09_10, - SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER__GFX09, - SPI_PERF_VWC_CSC_WR__GFX09, - SQC_PERF_SEL_DUMMY_LAST__GFX09, - 0, - PERF_CLIENT_UTCL1_INFLIGHT__GFX09_0, - SX_PERF_SEL_DB3_SIZE__GFX09_10, - TA_PERF_SEL_first_xnack_on_phase3__GFX09, - TCA_PERF_SEL_CROSSBAR_STALL_TCC7, - TCC_PERF_SEL_CLIENT127_REQ, - TCP_PERF_SEL_TCC_DCC_REQ__GFX09, - TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt__GFX09, - 0, - 0, - vgt_perf_sclk_te11_vld, - wd_perf_utcl1_stall_utcl2_event, -}; - -constexpr unsigned int Rv2xMaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_CC_BB_BLEND_PIXEL_VLD__GFX09_10, - 0, - 0, - 0, - CPC_PERF_SEL_ME2_DC1_SPI_BUSY__CORE, - CPF_PERF_SEL_CPF_UTCL2IU_STALL__GFX09, - CPG_PERF_SEL_CPG_UTCL2IU_STALL__RV1X_RV2X, - 0, - 0, - GDS_PERF_SEL_GWS_BYPASS__GFX09, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - GRBM_PERF_SEL_CPAXI_BUSY, - GRBM_SE0_PERF_SEL_RMI_BUSY, - GRBM_SE1_PERF_SEL_RMI_BUSY, - GRBM_SE2_PERF_SEL_RMI_BUSY, - GRBM_SE3_PERF_SEL_RMI_BUSY, - 0, - 0, - 0, - 0, - ia_perf_utcl1_stall_utcl2_event__GFX09_1X, - DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels__GFX09, - 0, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3__GFX09, - 0, - SC_DB1_TILE_INTERFACE_CREDIT_AT_MAX_WITH_NO_PENDING_SEND__GFX09_10, - SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER__GFX09, - SPI_PERF_VWC_CSC_WR__GFX09, - SQC_PERF_SEL_DUMMY_LAST__GFX09, - 0, - PERF_PA_PRIMIC_TO_CLPRIM_FIFO_FULL__GFX09_1X, - SX_PERF_SEL_DB3_SIZE__GFX09_10, - TA_PERF_SEL_first_xnack_on_phase3__GFX09, - TCA_PERF_SEL_CROSSBAR_STALL_TCC7, - TCC_PERF_SEL_CLIENT127_REQ, - TCP_PERF_SEL_TCC_DCC_REQ__GFX09, - TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt__GFX09, - 0, - 0, - vgt_perf_sclk_te11_vld, - wd_perf_utcl1_stall_utcl2_event, -}; - -constexpr unsigned int RnMaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_CC_DCC_KEY_VALUE__CONST_CLEAR_AC11__RN, - 0, - 0, - 0, - CPC_PERF_SEL_ME2_DC1_SPI_BUSY__CORE, - CPF_PERF_SEL_CPF_UTCL2IU_STALL__GFX09, - CPG_PERF_SEL_CPG_UTCL2IU_STALL__VG10_VG12_VG20_RN, - 0, - 0, - GDS_PERF_SEL_GWS_BYPASS__GFX09, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - GRBM_PERF_SEL_CPAXI_BUSY, - GRBM_SE0_PERF_SEL_RMI_BUSY, - GRBM_SE1_PERF_SEL_RMI_BUSY, - GRBM_SE2_PERF_SEL_RMI_BUSY, - GRBM_SE3_PERF_SEL_RMI_BUSY, - 0, - 0, - 0, - 0, - ia_perf_utcl1_stall_utcl2_event__GFX09_1X, - DB_PERF_SEL_DB_SC_quad_quads_with_4_pixels__GFX09, - 0, - RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3__GFX09, - 0, - SC_BACKEND_PRIM_FIFO_FULL__VG12_VG20_RN, - SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER__GFX09, - SPI_PERF_VWC_CSC_WR__GFX09, - SQC_PERF_SEL_DUMMY_LAST__GFX09, - 0, - PERF_PA_PRIMIC_TO_CLPRIM_FIFO_FULL__GFX09_1X, - SX_PERF_SEL_DB3_SIZE__GFX09_10, - TA_PERF_SEL_first_xnack_on_phase3__GFX09, - TCA_PERF_SEL_CROSSBAR_STALL_TCC7, - TCC_PERF_SEL_CLIENT127_REQ, - TCP_PERF_SEL_TCC_DCC_REQ__GFX09, - TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt__GFX09, - 0, - 0, - vgt_perf_sclk_te11_vld, - wd_perf_utcl1_stall_utcl2_event, -}; - -constexpr unsigned int Nv10MaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_CC_CACHE_SECTOR_HIT__GFX10CORE, - CHA_PERF_SEL_CYCLE__GFX101, - CHC_PERF_SEL_REQ_CLIENT14, - CHCG_PERF_SEL_REQ_CLIENT14__GFX101, - CPC_PERF_SEL_MES_THREAD1__GFX10COREPLUS, - CPF_PERF_SEL_CPF_UTCL2IU_XNACK__GFX10COREPLUS, - CPG_PERF_SEL_DMA_FETCHER_STALLED_ON_ROQ_FULL__GFX10COREPLUS, - 0, - GCR_PERF_SEL_UTCL2_FILTERED_RET__GFX10COREPLUS, - GDS_PERF_SEL_GWS_BYPASS__GFX101, - 0, - 0, - 0, - vgt_se3pa1_clips_starved_busy__GFX101, - GL1A_PERF_SEL_CYCLE__GFX101, - GL1C_PERF_SEL_REQ_CLIENT27__GFX101, - 0, - GL2A_PERF_SEL_REQ_BURST_CLIENT15, - GL2C_PERF_SEL_CM_DCC_STALL__GFX101, - GRBM_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE0_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE1_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE2_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE3_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - 0, - 0, - 0, - 0, - 0, - DB_PERF_SEL_FG_LOB_FWDR_TIMEOUT_hits__GFX101, - PH_PERF_SEL_SC7_PA7_DEALLOC_4_0_RD__GFX10, - RMI_PERF_SEL_UTCL0_UTCL1_PERM_FAULT__GFX10CORE, - RLC_PERF_SEL_SERDES_COMMAND_WRITE, - SC_PK_PM_MAX_REZ_CNT_FORCE_EOV_WAVE_BRK_1H__GFX10PLUS, - SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER__GFX10CORE, - SPI_PERF_LS_PERS_UPD_FULL1__GFX101, - SP_PERF_SEL_DUMMY_LAST__GFX10CORE, - 0, - PERF_ENGG_POS_REQ_STALLED_BY_FULL_CLIPV_FIFO__GFX101, - SX_PERF_SEL_RB3_STALL_DUE_TO_ORDERING__GFX10, - TA_PERF_SEL_nosample_path_cycles__GFX101, - 0, - 0, - TCP_PERF_SEL_BACK_COMPAT_SWITCH__GFX101, - TD_PERF_SEL_nofilter_popcount_dmask_lt_num_comp_of_fmt__GFX101, - UTCL1_PERF_SEL_RANGE_INV_REQS__GFX101, - UMC_PERF_SEL_RefreshType__GFX101, - 0, - 0, -}; - -#if CHIP_HDR_NAVI21 -constexpr unsigned int Nv21MaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_CC_CACHE_SECTOR_HIT__GFX10CORE, - CHA_PERF_SEL_CYCLE__NV21, - CHC_PERF_SEL_REQ_CLIENT19__GFX103PLUSEXCLUSIVE, - CHCG_PERF_SEL_REQ_CLIENT19__NV21, - CPC_PERF_SEL_MES_THREAD1__GFX10COREPLUS, - CPF_PERF_SEL_CPF_UTCL2IU_XNACK__GFX10COREPLUS, - CPG_PERF_SEL_DMA_FETCHER_STALLED_ON_ROQ_FULL__GFX10COREPLUS, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV21, - GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__NV21, - GDS_PERF_SEL_GWS_BYPASS__GFX103, - ge1_rbiu_dr_fifo_starved_p1__GFX103COREPLUS, - ge_tf_ret_data_stalling_hs_done__GFX103DERIVATIVE, - ge_hs_stall_tfmm_fifo_full__GFX103, - 0, - GL1A_PERF_SEL_CYCLE__GFX103PLUSEXCLUSIVE, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX103DERIVATIVE, - 0, - GL2A_PERF_SEL_REQ_BURST_CLIENT15, - GL2C_PERF_SEL_CM_DCC_OUT_CONST__GFX103DERIVATIVE, - GRBM_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE0_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE1_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE2_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE3_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - 0, - 0, - 0, - 0, - 0, - DB_PERF_SEL_postz_ps_invoked_pixel_cnt__GFX103, - PH_PERF_SEL_8_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE, - RMI_PERF_SEL_UTCL0_UTCL1_PERM_FAULT__GFX10CORE, - RLC_PERF_SEL_SERDES_COMMAND_WRITE, - SC_BM_MULTI_ACCUM_4_BE_STALLED__GFX103PLUSEXCLUSIVE, - SDMA_PERF_SEL_CH_CE_RDRET_VALID__GFX103, - SPI_PERF_EXP_THROT_CAUSALITY_DETECTED__GFX103, - SP_PERF_SEL_DUMMY_LAST__GFX10CORE, - 0, - PERF_OUTPUT_PRIM_4_SC__GFX103PLUSEXCLUSIVE, - SX_PERF_SEL_RB3_STALL_DUE_TO_ORDERING__GFX10, - TA_PERF_SEL_tcreq_clk_valid_cycles__GFX103PLUSEXCLUSIVE, - 0, - 0, - TCP_PERF_SEL_BACK_COMPAT_SWITCH__GFX103, - TD_PERF_SEL_input_bp_due_to_done_scoreboard_full__GFX103PLUSEXCLUSIVE, - UTCL1_PERF_SEL_INV_ALL_VMID_INVREQS__GFX103DERIVATIVE, - UMC_PERF_SEL_TempCnt__NV21, - 0, - 0, -}; -#endif - -#if CHIP_HDR_NAVI22 -constexpr unsigned int Nv22MaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_CC_CACHE_SECTOR_HIT__GFX10CORE, - CHA_PERF_SEL_CYCLE__NV22, - CHC_PERF_SEL_REQ_CLIENT19__GFX103PLUSEXCLUSIVE, - CHCG_PERF_SEL_REQ_CLIENT19__NV22, - CPC_PERF_SEL_MES_THREAD1__GFX10COREPLUS, - CPF_PERF_SEL_CPF_UTCL2IU_XNACK__GFX10COREPLUS, - CPG_PERF_SEL_DMA_FETCHER_STALLED_ON_ROQ_FULL__GFX10COREPLUS, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV22, - GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__NV22, - GDS_PERF_SEL_GWS_BYPASS__GFX103, - ge1_rbiu_dr_fifo_starved_p1__GFX103COREPLUS, - ge_tf_ret_data_stalling_hs_done__GFX103DERIVATIVE, - ge_hs_stall_tfmm_fifo_full__GFX103, - 0, - GL1A_PERF_SEL_CYCLE__GFX103PLUSEXCLUSIVE, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX103DERIVATIVE, - 0, - GL2A_PERF_SEL_REQ_BURST_CLIENT15, - GL2C_PERF_SEL_CM_DCC_OUT_CONST__GFX103DERIVATIVE, - GRBM_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE0_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE1_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE2_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE3_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - 0, - 0, - 0, - 0, - 0, - DB_PERF_SEL_postz_ps_invoked_pixel_cnt__GFX103, - PH_PERF_SEL_8_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE, - RMI_PERF_SEL_UTCL0_UTCL1_PERM_FAULT__GFX10CORE, - RLC_PERF_SEL_SERDES_COMMAND_WRITE, - SC_BM_MULTI_ACCUM_4_BE_STALLED__GFX103PLUSEXCLUSIVE, - SDMA_PERF_SEL_CH_CE_RDRET_VALID__GFX103, - SPI_PERF_EXP_THROT_CAUSALITY_DETECTED__GFX103, - SP_PERF_SEL_DUMMY_LAST__GFX10CORE, - 0, - PERF_OUTPUT_PRIM_4_SC__GFX103PLUSEXCLUSIVE, - SX_PERF_SEL_RB3_STALL_DUE_TO_ORDERING__GFX10, - TA_PERF_SEL_tcreq_clk_valid_cycles__GFX103PLUSEXCLUSIVE, - 0, - 0, - TCP_PERF_SEL_BACK_COMPAT_SWITCH__GFX103, - TD_PERF_SEL_input_bp_due_to_done_scoreboard_full__GFX103PLUSEXCLUSIVE, - UTCL1_PERF_SEL_INV_ALL_VMID_INVREQS__GFX103DERIVATIVE, - UMC_PERF_SEL_TempCnt__NV22, - 0, - 0, -}; -#endif - -#if CHIP_HDR_NAVI23 -constexpr unsigned int Nv23MaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_CC_CACHE_SECTOR_HIT__GFX10CORE, - CHA_PERF_SEL_CYCLE__NV23, - CHC_PERF_SEL_REQ_CLIENT19__GFX103PLUSEXCLUSIVE, - CHCG_PERF_SEL_REQ_CLIENT19__NV23, - CPC_PERF_SEL_MES_THREAD1__GFX10COREPLUS, - CPF_PERF_SEL_CPF_UTCL2IU_XNACK__GFX10COREPLUS, - CPG_PERF_SEL_DMA_FETCHER_STALLED_ON_ROQ_FULL__GFX10COREPLUS, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV23, - GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__NV23, - GDS_PERF_SEL_GWS_BYPASS__GFX103, - ge1_rbiu_dr_fifo_starved_p1__GFX103COREPLUS, - ge_tf_ret_data_stalling_hs_done__GFX103DERIVATIVE, - ge_hs_stall_tfmm_fifo_full__GFX103, - 0, - GL1A_PERF_SEL_CYCLE__GFX103PLUSEXCLUSIVE, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX103DERIVATIVE, - 0, - GL2A_PERF_SEL_REQ_BURST_CLIENT15, - GL2C_PERF_SEL_CM_DCC_OUT_CONST__GFX103DERIVATIVE, - GRBM_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE0_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE1_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE2_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE3_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - 0, - 0, - 0, - 0, - 0, - DB_PERF_SEL_postz_ps_invoked_pixel_cnt__GFX103, - PH_PERF_SEL_8_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE, - RMI_PERF_SEL_UTCL0_UTCL1_PERM_FAULT__GFX10CORE, - RLC_PERF_SEL_SERDES_COMMAND_WRITE, - SC_BM_MULTI_ACCUM_4_BE_STALLED__GFX103PLUSEXCLUSIVE, - SDMA_PERF_SEL_CH_CE_RDRET_VALID__GFX103, - SPI_PERF_EXP_THROT_CAUSALITY_DETECTED__GFX103, - SP_PERF_SEL_DUMMY_LAST__GFX10CORE, - 0, - PERF_OUTPUT_PRIM_4_SC__GFX103PLUSEXCLUSIVE, - SX_PERF_SEL_RB3_STALL_DUE_TO_ORDERING__GFX10, - TA_PERF_SEL_tcreq_clk_valid_cycles__GFX103PLUSEXCLUSIVE, - 0, - 0, - TCP_PERF_SEL_BACK_COMPAT_SWITCH__GFX103, - TD_PERF_SEL_input_bp_due_to_done_scoreboard_full__GFX103PLUSEXCLUSIVE, - UTCL1_PERF_SEL_UTCL2_REQS_OUTSTANDING_ACCUM__NV23, - UMC_PERF_SEL_TempCnt__NV23, - 0, - 0, -}; -#endif - -#if CHIP_HDR_NAVI24 -constexpr unsigned int Nv24MaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_CC_CACHE_SECTOR_HIT__GFX10CORE, - CHA_PERF_SEL_CYCLE__NV24, - CHC_PERF_SEL_REQ_CLIENT19__GFX103PLUSEXCLUSIVE, - CHCG_PERF_SEL_REQ_CLIENT19__NV24, - CPC_PERF_SEL_MES_THREAD1__GFX10COREPLUS, - CPF_PERF_SEL_CPF_UTCL2IU_XNACK__GFX10COREPLUS, - CPG_PERF_SEL_DMA_FETCHER_STALLED_ON_ROQ_FULL__GFX10COREPLUS, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV24, - GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__NV24, - GDS_PERF_SEL_GWS_BYPASS__GFX103, - ge1_rbiu_dr_fifo_starved_p1__GFX103COREPLUS, - ge_tf_ret_data_stalling_hs_done__GFX103DERIVATIVE, - ge_hs_stall_tfmm_fifo_full__GFX103, - 0, - GL1A_PERF_SEL_CYCLE__GFX103PLUSEXCLUSIVE, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX103DERIVATIVE, - 0, - GL2A_PERF_SEL_REQ_BURST_CLIENT15, - GL2C_PERF_SEL_CM_DCC_OUT_CONST__GFX103DERIVATIVE, - GRBM_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE0_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE1_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE2_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE3_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - 0, - 0, - 0, - 0, - 0, - DB_PERF_SEL_postz_ps_invoked_pixel_cnt__GFX103, - PH_PERF_SEL_8_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE, - RMI_PERF_SEL_UTCL0_UTCL1_PERM_FAULT__GFX10CORE, - RLC_PERF_SEL_SERDES_COMMAND_WRITE, - SC_BM_MULTI_ACCUM_4_BE_STALLED__GFX103PLUSEXCLUSIVE, - SDMA_PERF_SEL_CH_CE_RDRET_VALID__GFX103, - SPI_PERF_EXP_THROT_CAUSALITY_DETECTED__GFX103, - SP_PERF_SEL_DUMMY_LAST__GFX10CORE, - 0, - PERF_OUTPUT_PRIM_4_SC__GFX103PLUSEXCLUSIVE, - SX_PERF_SEL_RB3_STALL_DUE_TO_ORDERING__GFX10, - TA_PERF_SEL_tcreq_clk_valid_cycles__GFX103PLUSEXCLUSIVE, - 0, - 0, - TCP_PERF_SEL_BACK_COMPAT_SWITCH__GFX103, - TD_PERF_SEL_input_bp_due_to_done_scoreboard_full__GFX103PLUSEXCLUSIVE, - UTCL1_PERF_SEL_UTCL2_REQS_OUTSTANDING_ACCUM__NV24, - UMC_PERF_SEL_TempCnt__NV24, - 0, - 0, -}; -#endif - -constexpr unsigned int RembrandtMaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_CC_CACHE_SECTOR_HIT__GFX10CORE, - CHA_PERF_SEL_CYCLE__REMBRANDT, - CHC_PERF_SEL_REQ_CLIENT19__GFX103PLUSEXCLUSIVE, - 0, - CPC_PERF_SEL_MES_THREAD1__GFX10COREPLUS, - CPF_PERF_SEL_CPF_UTCL2IU_XNACK__GFX10COREPLUS, - CPG_PERF_SEL_DMA_FETCHER_STALLED_ON_ROQ_FULL__GFX10COREPLUS, - 0, - GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__REMBRANDT, - GDS_PERF_SEL_GWS_BYPASS__GFX103, - ge1_rbiu_dr_fifo_starved_p1__GFX103COREPLUS, - ge_tf_ret_data_stalling_hs_done__GFX103DERIVATIVE, - ge_hs_stall_tfmm_fifo_full__GFX103, - 0, - GL1A_PERF_SEL_CYCLE__GFX103PLUSEXCLUSIVE, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX103DERIVATIVE, - 0, - GL2A_PERF_SEL_REQ_BURST_CLIENT15, - GL2C_PERF_SEL_CM_DCC_OUT_CONST__GFX103DERIVATIVE, - GRBM_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE0_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE1_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE2_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE3_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - 0, - 0, - 0, - 0, - 0, - DB_PERF_SEL_postz_ps_invoked_pixel_cnt__GFX103, - PH_PERF_SEL_8_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE, - RMI_PERF_SEL_UTCL0_UTCL1_PERM_FAULT__GFX10CORE, - RLC_PERF_SEL_SERDES_COMMAND_WRITE, - SC_BM_MULTI_ACCUM_4_BE_STALLED__GFX103PLUSEXCLUSIVE, - SDMA_PERF_SEL_CH_CE_RDRET_VALID__GFX103, - SPI_PERF_EXP_THROT_CAUSALITY_DETECTED__GFX103, - SP_PERF_SEL_DUMMY_LAST__GFX10CORE, - 0, - PERF_OUTPUT_PRIM_4_SC__GFX103PLUSEXCLUSIVE, - SX_PERF_SEL_RB3_STALL_DUE_TO_ORDERING__GFX10, - TA_PERF_SEL_tcreq_clk_valid_cycles__GFX103PLUSEXCLUSIVE, - 0, - 0, - TCP_PERF_SEL_BACK_COMPAT_SWITCH__GFX103, - TD_PERF_SEL_input_bp_due_to_done_scoreboard_full__GFX103PLUSEXCLUSIVE, - UTCL1_PERF_SEL_UTCL2_REQS_OUTSTANDING_ACCUM__REMBRANDT, - 0, - 0, - 0, -}; - -#if CHIP_HDR_NAVI31 -constexpr unsigned int Nv31MaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_EXPORT_KILLED_BY_NULL_TARGET_SHADER_MASK__GFX11, - CHA_PERF_SEL_CYCLE__NV3X, - CHC_PERF_SEL_REQ_CLIENT23__GFX11, - CHCG_PERF_SEL_REQ_CLIENT23__NV3X, - CPC_PERF_SEL_MEC_THREAD3__GFX11, - CPF_PERF_SEL_CP_SDMA_MNGR_SDMABUSY__GFX11, - CPG_PERF_SEL_PFP_VGTDMA_DB_ROQ_DATA_STALL1__GFX11, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV3X, - GCR_PERF_SEL_PIO_GL1_TLB_SHOOTDOWN_REQ__NV3X, - GDS_PERF_SEL_SE7_GS_WAVE_ID_VALID__GFX11, - ge1_rbiu_dr_fifo_starved_p1__GFX103COREPLUS, - ge_agm_gcr_combine__GFX11, - ge_ngg_busy_base__GFX11, - 0, - GL1A_PERF_SEL_CYCLE__GFX103PLUSEXCLUSIVE, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX11, - 0, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT15__GFX104PLUS, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT19__GFX11, - GRBM_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE0_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE1_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE2_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE3_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE4_PERF_SEL_PC_BUSY, - GRBM_SE5_PERF_SEL_PC_BUSY, - GRBM_SE6_PERF_SEL_PC_BUSY, - GRBM_SE7_PERF_SEL_PC_BUSY, - 0, - DB_PERF_SEL_OREO_Events_stalls__GFX11, - PH_PERF_SC7_FIFO_STATUS_3__GFX11, - RMI_PERF_SEL_CONSUMER_PROBEGEN_DB_RTS_RTR__GFX11, - RLC_PERF_SEL_SERDES_COMMAND_WRITE, - SC_SPI_WAVE_STALLED_BY_SPI__GFX11, - SDMA_PERF_SEL_QUEUE7_SELECT__GFX11, - SPI_PERF_BUSY__GFX11, - SQ_PERF_SEL_NONE2__GFX104PLUS, - SQG_PERF_SEL_DUMMY_LAST, - PERF_PA_BUSY__GFX11, - SX_PERF_SEL_DB3_4X2_DISCARD__GFX11, - TA_PERF_SEL_tcreq_clk_valid_cycles__GFX103PLUSEXCLUSIVE, - 0, - 0, - TCP_PERF_SEL_BURST_BIN_READHIT_gt16__GFX11, - TD_PERF_SEL_store_preempts_a_load__GFX11, - UTCL1_PERF_SEL_ALOG_STALL_PMM_CREDITS__GFX11, - UMC_PERF_SEL_ClockCount__NV3X, - 0, - 0, -}; -#endif - -#if CHIP_HDR_NAVI32 -constexpr unsigned int Nv32MaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_EXPORT_KILLED_BY_NULL_TARGET_SHADER_MASK__GFX11, - CHA_PERF_SEL_CYCLE__NV3X, - CHC_PERF_SEL_REQ_CLIENT23__GFX11, - CHCG_PERF_SEL_REQ_CLIENT23__NV3X, - CPC_PERF_SEL_MEC_THREAD3__GFX11, - CPF_PERF_SEL_CP_SDMA_MNGR_SDMABUSY__GFX11, - CPG_PERF_SEL_PFP_VGTDMA_DB_ROQ_DATA_STALL1__GFX11, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV3X, - GCR_PERF_SEL_PIO_GL1_TLB_SHOOTDOWN_REQ__NV3X, - GDS_PERF_SEL_SE7_GS_WAVE_ID_VALID__GFX11, - ge1_rbiu_dr_fifo_starved_p1__GFX103COREPLUS, - ge_agm_gcr_combine__GFX11, - ge_ngg_busy_base__GFX11, - 0, - GL1A_PERF_SEL_CYCLE__GFX103PLUSEXCLUSIVE, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX11, - 0, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT15__GFX104PLUS, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT19__GFX11, - GRBM_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE0_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE1_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE2_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE3_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE4_PERF_SEL_PC_BUSY, - GRBM_SE5_PERF_SEL_PC_BUSY, - GRBM_SE6_PERF_SEL_PC_BUSY, - GRBM_SE7_PERF_SEL_PC_BUSY, - 0, - DB_PERF_SEL_OREO_Events_stalls__GFX11, - PH_PERF_SC7_FIFO_STATUS_3__GFX11, - RMI_PERF_SEL_CONSUMER_PROBEGEN_DB_RTS_RTR__GFX11, - RLC_PERF_SEL_SERDES_COMMAND_WRITE, - SC_VRC_REPROBE_FULL__NV32, - SDMA_PERF_SEL_QUEUE7_SELECT__GFX11, - SPI_PERF_BUSY__GFX11, - SQ_PERF_SEL_NONE2__GFX104PLUS, - SQG_PERF_SEL_DUMMY_LAST, - PERF_PA_BUSY__GFX11, - SX_PERF_SEL_DB3_4X2_DISCARD__GFX11, - TA_PERF_SEL_tcreq_clk_valid_cycles__GFX103PLUSEXCLUSIVE, - 0, - 0, - TCP_PERF_SEL_BURST_BIN_READHIT_gt16__GFX11, - TD_PERF_SEL_store_preempts_a_load__GFX11, - UTCL1_PERF_SEL_ALOG_STALL_PMM_CREDITS__GFX11, - UMC_PERF_SEL_ClockCount__NV3X, - 0, - 0, -}; -#endif - -#if CHIP_HDR_NAVI33 -constexpr unsigned int Nv33MaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_EXPORT_KILLED_BY_NULL_TARGET_SHADER_MASK__GFX11, - CHA_PERF_SEL_CYCLE__NV3X, - CHC_PERF_SEL_REQ_CLIENT23__GFX11, - CHCG_PERF_SEL_REQ_CLIENT23__NV3X, - CPC_PERF_SEL_MEC_THREAD3__GFX11, - CPF_PERF_SEL_CP_SDMA_MNGR_SDMABUSY__GFX11, - CPG_PERF_SEL_PFP_VGTDMA_DB_ROQ_DATA_STALL1__GFX11, - DF_MALL_PERF_SEL_MALL_SDP_LAT_HIST_GT1000__NV3X, - GCR_PERF_SEL_PIO_GL1_TLB_SHOOTDOWN_REQ__NV3X, - GDS_PERF_SEL_SE7_GS_WAVE_ID_VALID__GFX11, - ge1_rbiu_dr_fifo_starved_p1__GFX103COREPLUS, - ge_agm_gcr_combine__GFX11, - ge_ngg_busy_base__GFX11, - 0, - GL1A_PERF_SEL_CYCLE__GFX103PLUSEXCLUSIVE, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX11, - 0, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT15__GFX104PLUS, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT19__GFX11, - GRBM_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE0_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE1_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE2_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE3_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE4_PERF_SEL_PC_BUSY, - GRBM_SE5_PERF_SEL_PC_BUSY, - GRBM_SE6_PERF_SEL_PC_BUSY, - GRBM_SE7_PERF_SEL_PC_BUSY, - 0, - DB_PERF_SEL_OREO_Events_stalls__GFX11, - PH_PERF_SC7_FIFO_STATUS_3__GFX11, - RMI_PERF_SEL_CONSUMER_PROBEGEN_DB_RTS_RTR__GFX11, - RLC_PERF_SEL_SERDES_COMMAND_WRITE, - SC_VRC_REPROBE_FULL__NV33, - SDMA_PERF_SEL_QUEUE7_SELECT__GFX11, - SPI_PERF_BUSY__GFX11, - SQ_PERF_SEL_NONE2__GFX104PLUS, - SQG_PERF_SEL_DUMMY_LAST, - PERF_PA_BUSY__GFX11, - SX_PERF_SEL_DB3_4X2_DISCARD__GFX11, - TA_PERF_SEL_tcreq_clk_valid_cycles__GFX103PLUSEXCLUSIVE, - 0, - 0, - TCP_PERF_SEL_BURST_BIN_READHIT_gt16__GFX11, - TD_PERF_SEL_store_preempts_a_load__GFX11, - UTCL1_PERF_SEL_ALOG_STALL_PMM_CREDITS__GFX11, - UMC_PERF_SEL_ClockCount__NV3X, - 0, - 0, -}; -#endif - -constexpr unsigned int RaphaelMaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_CC_CACHE_SECTOR_HIT__GFX10CORE, - CHA_PERF_SEL_CYCLE__RAPHAEL, - CHC_PERF_SEL_REQ_CLIENT19__GFX103PLUSEXCLUSIVE, - 0, - CPC_PERF_SEL_MES_THREAD1__GFX10COREPLUS, - CPF_PERF_SEL_CPF_UTCL2IU_XNACK__GFX10COREPLUS, - CPG_PERF_SEL_DMA_FETCHER_STALLED_ON_ROQ_FULL__GFX10COREPLUS, - 0, - GCR_PERF_SEL_PIO_TCP_TLB_SHOOTDOWN_REQ__RAPHAEL, - GDS_PERF_SEL_GWS_BYPASS__GFX103, - ge1_rbiu_dr_fifo_starved_p1__GFX103COREPLUS, - ge_tf_ret_data_stalling_hs_done__GFX103DERIVATIVE, - ge_hs_stall_tfmm_fifo_full__GFX103, - 0, - GL1A_PERF_SEL_CYCLE__GFX103PLUSEXCLUSIVE, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX103DERIVATIVE, - 0, - GL2A_PERF_SEL_REQ_BURST_CLIENT15, - GL2C_PERF_SEL_CM_DCC_OUT_CONST__GFX103DERIVATIVE, - GRBM_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE0_PERF_SEL_GL1CC_BUSY__GFX10PLUS, - GRBM_SE1_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE2_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - GRBM_SE3_PERF_SEL_GL1CC_BUSY__GFX10COREPLUS, - 0, - 0, - 0, - 0, - 0, - DB_PERF_SEL_postz_ps_invoked_pixel_cnt__GFX103, - PH_PERF_SEL_8_SC_ARB_STARVED_FROM_ABOVE_WITH_UNSELECTED_FIFO_FULL__GFX103PLUSEXCLUSIVE, - RMI_PERF_SEL_UTCL0_UTCL1_PERM_FAULT__GFX10CORE, - RLC_PERF_SEL_SERDES_COMMAND_WRITE, - SC_BM_MULTI_ACCUM_4_BE_STALLED__GFX103PLUSEXCLUSIVE, - SDMA_PERF_SEL_CH_CE_RDRET_VALID__GFX103, - SPI_PERF_EXP_THROT_CAUSALITY_DETECTED__GFX103, - SP_PERF_SEL_DUMMY_LAST__GFX10CORE, - 0, - PERF_OUTPUT_PRIM_4_SC__GFX103PLUSEXCLUSIVE, - SX_PERF_SEL_RB3_STALL_DUE_TO_ORDERING__GFX10, - TA_PERF_SEL_tcreq_clk_valid_cycles__GFX103PLUSEXCLUSIVE, - 0, - 0, - TCP_PERF_SEL_BACK_COMPAT_SWITCH__GFX103, - TD_PERF_SEL_input_bp_due_to_done_scoreboard_full__GFX103PLUSEXCLUSIVE, - UTCL1_PERF_SEL_UTCL2_REQS_OUTSTANDING_ACCUM__RAPHAEL, - 0, - 0, - 0, -}; - -#if CHIP_HDR_PHOENIX1 -constexpr unsigned int Phx1MaxPerfEventIds[MaxPerfCtrId] = -{ - CB_PERF_SEL_EXPORT_KILLED_BY_NULL_TARGET_SHADER_MASK__GFX11, - CHA_PERF_SEL_CYCLE__APU11, - CHC_PERF_SEL_REQ_CLIENT23__GFX11, - 0, - CPC_PERF_SEL_MEC_THREAD3__GFX11, - CPF_PERF_SEL_CP_SDMA_MNGR_SDMABUSY__GFX11, - CPG_PERF_SEL_PFP_VGTDMA_DB_ROQ_DATA_STALL1__GFX11, - 0, - GCR_PERF_SEL_PIO_GL1_TLB_SHOOTDOWN_REQ__APU11, - GDS_PERF_SEL_SE7_GS_WAVE_ID_VALID__GFX11, - ge1_rbiu_dr_fifo_starved_p1__GFX103COREPLUS, - ge_agm_gcr_combine__GFX11, - ge_ngg_busy_base__GFX11, - 0, - GL1A_PERF_SEL_CYCLE__GFX103PLUSEXCLUSIVE, - GL1C_PERF_SEL_UTCL0_UTCL1_XNACK_NO_RETRY_FAULT__GFX11, - 0, - GL2A_PERF_SEL_RTN_CREDIT_STALL_CLIENT15__GFX104PLUS, - GL2C_PERF_SEL_HIT_PASS_MISS_IN_CLIENT19__GFX11, - GRBM_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE0_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE1_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE2_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE3_PERF_SEL_PC_BUSY__GFX11, - GRBM_SE4_PERF_SEL_PC_BUSY, - GRBM_SE5_PERF_SEL_PC_BUSY, - GRBM_SE6_PERF_SEL_PC_BUSY, - GRBM_SE7_PERF_SEL_PC_BUSY, - 0, - DB_PERF_SEL_OREO_Events_stalls__GFX11, - PH_PERF_SC7_FIFO_STATUS_3__GFX11, - RMI_PERF_SEL_CONSUMER_PROBEGEN_DB_RTS_RTR__GFX11, - RLC_PERF_SEL_SERDES_COMMAND_WRITE, - SC_VRC_REPROBE_FULL__APU11, - SDMA_PERF_SEL_QUEUE7_SELECT__GFX11, - SPI_PERF_BUSY__GFX11, - SQ_PERF_SEL_NONE2__GFX104PLUS, - SQG_PERF_SEL_DUMMY_LAST, - PERF_PA_BUSY__GFX11, - SX_PERF_SEL_DB3_4X2_DISCARD__GFX11, - TA_PERF_SEL_tcreq_clk_valid_cycles__GFX103PLUSEXCLUSIVE, - 0, - 0, - TCP_PERF_SEL_BURST_BIN_READHIT_gt16__GFX11, - TD_PERF_SEL_store_preempts_a_load__GFX11, - UTCL1_PERF_SEL_ALOG_STALL_PMM_CREDITS__GFX11, - 0, - 0, - 0, -}; -#endif - -} // inline namespace Chip -} // namespace Gfx9 -} // namespace Pal diff --git a/lgc/imported/chip/gfx9/gfx9_plus_merged_offset.h b/lgc/imported/chip/gfx9/gfx9_plus_merged_offset.h deleted file mode 100644 index 1fe01c64b6..0000000000 --- a/lgc/imported/chip/gfx9/gfx9_plus_merged_offset.h +++ /dev/null @@ -1,6800 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ - -#pragma once - -namespace Pal -{ -namespace Gfx9 -{ -inline namespace Chip -{ -constexpr unsigned int mmCB_BLEND0_CONTROL = 0xA1E0; -constexpr unsigned int mmCB_BLEND1_CONTROL = 0xA1E1; -constexpr unsigned int mmCB_BLEND2_CONTROL = 0xA1E2; -constexpr unsigned int mmCB_BLEND3_CONTROL = 0xA1E3; -constexpr unsigned int mmCB_BLEND4_CONTROL = 0xA1E4; -constexpr unsigned int mmCB_BLEND5_CONTROL = 0xA1E5; -constexpr unsigned int mmCB_BLEND6_CONTROL = 0xA1E6; -constexpr unsigned int mmCB_BLEND7_CONTROL = 0xA1E7; -constexpr unsigned int mmCB_BLEND_ALPHA = 0xA108; -constexpr unsigned int mmCB_BLEND_BLUE = 0xA107; -constexpr unsigned int mmCB_BLEND_GREEN = 0xA106; -constexpr unsigned int mmCB_BLEND_RED = 0xA105; -constexpr unsigned int mmCB_CGTT_SCLK_CTRL = 0xF0A8; -constexpr unsigned int mmCB_COLOR0_ATTRIB = 0xA31D; -constexpr unsigned int mmCB_COLOR0_BASE = 0xA318; -constexpr unsigned int mmCB_COLOR0_DCC_BASE = 0xA325; -constexpr unsigned int mmCB_COLOR0_DCC_CONTROL = 0xA31E; -constexpr unsigned int mmCB_COLOR0_INFO = 0xA31C; -constexpr unsigned int mmCB_COLOR0_VIEW = 0xA31B; -constexpr unsigned int mmCB_COLOR1_ATTRIB = 0xA32C; -constexpr unsigned int mmCB_COLOR1_BASE = 0xA327; -constexpr unsigned int mmCB_COLOR1_DCC_BASE = 0xA334; -constexpr unsigned int mmCB_COLOR1_DCC_CONTROL = 0xA32D; -constexpr unsigned int mmCB_COLOR1_INFO = 0xA32B; -constexpr unsigned int mmCB_COLOR1_VIEW = 0xA32A; -constexpr unsigned int mmCB_COLOR2_ATTRIB = 0xA33B; -constexpr unsigned int mmCB_COLOR2_BASE = 0xA336; -constexpr unsigned int mmCB_COLOR2_DCC_BASE = 0xA343; -constexpr unsigned int mmCB_COLOR2_DCC_CONTROL = 0xA33C; -constexpr unsigned int mmCB_COLOR2_INFO = 0xA33A; -constexpr unsigned int mmCB_COLOR2_VIEW = 0xA339; -constexpr unsigned int mmCB_COLOR3_ATTRIB = 0xA34A; -constexpr unsigned int mmCB_COLOR3_BASE = 0xA345; -constexpr unsigned int mmCB_COLOR3_DCC_BASE = 0xA352; -constexpr unsigned int mmCB_COLOR3_DCC_CONTROL = 0xA34B; -constexpr unsigned int mmCB_COLOR3_INFO = 0xA349; -constexpr unsigned int mmCB_COLOR3_VIEW = 0xA348; -constexpr unsigned int mmCB_COLOR4_ATTRIB = 0xA359; -constexpr unsigned int mmCB_COLOR4_BASE = 0xA354; -constexpr unsigned int mmCB_COLOR4_DCC_BASE = 0xA361; -constexpr unsigned int mmCB_COLOR4_DCC_CONTROL = 0xA35A; -constexpr unsigned int mmCB_COLOR4_INFO = 0xA358; -constexpr unsigned int mmCB_COLOR4_VIEW = 0xA357; -constexpr unsigned int mmCB_COLOR5_ATTRIB = 0xA368; -constexpr unsigned int mmCB_COLOR5_BASE = 0xA363; -constexpr unsigned int mmCB_COLOR5_DCC_BASE = 0xA370; -constexpr unsigned int mmCB_COLOR5_DCC_CONTROL = 0xA369; -constexpr unsigned int mmCB_COLOR5_INFO = 0xA367; -constexpr unsigned int mmCB_COLOR5_VIEW = 0xA366; -constexpr unsigned int mmCB_COLOR6_ATTRIB = 0xA377; -constexpr unsigned int mmCB_COLOR6_BASE = 0xA372; -constexpr unsigned int mmCB_COLOR6_DCC_BASE = 0xA37F; -constexpr unsigned int mmCB_COLOR6_DCC_CONTROL = 0xA378; -constexpr unsigned int mmCB_COLOR6_INFO = 0xA376; -constexpr unsigned int mmCB_COLOR6_VIEW = 0xA375; -constexpr unsigned int mmCB_COLOR7_ATTRIB = 0xA386; -constexpr unsigned int mmCB_COLOR7_BASE = 0xA381; -constexpr unsigned int mmCB_COLOR7_DCC_BASE = 0xA38E; -constexpr unsigned int mmCB_COLOR7_DCC_CONTROL = 0xA387; -constexpr unsigned int mmCB_COLOR7_INFO = 0xA385; -constexpr unsigned int mmCB_COLOR7_VIEW = 0xA384; -constexpr unsigned int mmCB_COLOR_CONTROL = 0xA202; -constexpr unsigned int mmCB_HW_CONTROL_3 = 0x2683; -constexpr unsigned int mmCB_PERFCOUNTER0_HI = 0xD407; -constexpr unsigned int mmCB_PERFCOUNTER0_LO = 0xD406; -constexpr unsigned int mmCB_PERFCOUNTER0_SELECT = 0xDC01; -constexpr unsigned int mmCB_PERFCOUNTER0_SELECT1 = 0xDC02; -constexpr unsigned int mmCB_PERFCOUNTER1_HI = 0xD409; -constexpr unsigned int mmCB_PERFCOUNTER1_LO = 0xD408; -constexpr unsigned int mmCB_PERFCOUNTER1_SELECT = 0xDC03; -constexpr unsigned int mmCB_PERFCOUNTER2_HI = 0xD40B; -constexpr unsigned int mmCB_PERFCOUNTER2_LO = 0xD40A; -constexpr unsigned int mmCB_PERFCOUNTER2_SELECT = 0xDC04; -constexpr unsigned int mmCB_PERFCOUNTER3_HI = 0xD40D; -constexpr unsigned int mmCB_PERFCOUNTER3_LO = 0xD40C; -constexpr unsigned int mmCB_PERFCOUNTER3_SELECT = 0xDC05; -constexpr unsigned int mmCB_PERFCOUNTER_FILTER = 0xDC00; -constexpr unsigned int mmCB_SHADER_MASK = 0xA08F; -constexpr unsigned int mmCB_TARGET_MASK = 0xA08E; -constexpr unsigned int mmCOHER_DEST_BASE_0 = 0xA092; -constexpr unsigned int mmCOHER_DEST_BASE_1 = 0xA093; -constexpr unsigned int mmCOHER_DEST_BASE_2 = 0xA07E; -constexpr unsigned int mmCOHER_DEST_BASE_3 = 0xA07F; -constexpr unsigned int mmCOHER_DEST_BASE_HI_0 = 0xA07A; -constexpr unsigned int mmCOHER_DEST_BASE_HI_1 = 0xA07B; -constexpr unsigned int mmCOHER_DEST_BASE_HI_2 = 0xA07C; -constexpr unsigned int mmCOHER_DEST_BASE_HI_3 = 0xA07D; -constexpr unsigned int mmCOMPUTE_DIM_X = 0x2E01; -constexpr unsigned int mmCOMPUTE_DIM_Y = 0x2E02; -constexpr unsigned int mmCOMPUTE_DIM_Z = 0x2E03; -constexpr unsigned int mmCOMPUTE_DISPATCH_ID = 0x2E20; -constexpr unsigned int mmCOMPUTE_DISPATCH_INITIATOR = 0x2E00; -constexpr unsigned int mmCOMPUTE_DISPATCH_PKT_ADDR_HI = 0x2E0F; -constexpr unsigned int mmCOMPUTE_DISPATCH_PKT_ADDR_LO = 0x2E0E; -constexpr unsigned int mmCOMPUTE_DISPATCH_SCRATCH_BASE_HI = 0x2E11; -constexpr unsigned int mmCOMPUTE_DISPATCH_SCRATCH_BASE_LO = 0x2E10; -constexpr unsigned int mmCOMPUTE_MISC_RESERVED = 0x2E1F; -constexpr unsigned int mmCOMPUTE_NOWHERE = 0x2E7F; -constexpr unsigned int mmCOMPUTE_NUM_THREAD_X = 0x2E07; -constexpr unsigned int mmCOMPUTE_NUM_THREAD_Y = 0x2E08; -constexpr unsigned int mmCOMPUTE_NUM_THREAD_Z = 0x2E09; -constexpr unsigned int mmCOMPUTE_PERFCOUNT_ENABLE = 0x2E0B; -constexpr unsigned int mmCOMPUTE_PGM_HI = 0x2E0D; -constexpr unsigned int mmCOMPUTE_PGM_LO = 0x2E0C; -constexpr unsigned int mmCOMPUTE_PGM_RSRC1 = 0x2E12; -constexpr unsigned int mmCOMPUTE_PGM_RSRC2 = 0x2E13; -constexpr unsigned int mmCOMPUTE_PIPELINESTAT_ENABLE = 0x2E0A; -constexpr unsigned int mmCOMPUTE_RESOURCE_LIMITS = 0x2E15; -constexpr unsigned int mmCOMPUTE_RESTART_X = 0x2E1B; -constexpr unsigned int mmCOMPUTE_RESTART_Y = 0x2E1C; -constexpr unsigned int mmCOMPUTE_RESTART_Z = 0x2E1D; -constexpr unsigned int mmCOMPUTE_START_X = 0x2E04; -constexpr unsigned int mmCOMPUTE_START_Y = 0x2E05; -constexpr unsigned int mmCOMPUTE_START_Z = 0x2E06; -constexpr unsigned int mmCOMPUTE_STATIC_THREAD_MGMT_SE0 = 0x2E16; -constexpr unsigned int mmCOMPUTE_STATIC_THREAD_MGMT_SE1 = 0x2E17; -constexpr unsigned int mmCOMPUTE_STATIC_THREAD_MGMT_SE2 = 0x2E19; -constexpr unsigned int mmCOMPUTE_STATIC_THREAD_MGMT_SE3 = 0x2E1A; -constexpr unsigned int mmCOMPUTE_THREADGROUP_ID = 0x2E21; -constexpr unsigned int mmCOMPUTE_THREAD_TRACE_ENABLE = 0x2E1E; -constexpr unsigned int mmCOMPUTE_TMPRING_SIZE = 0x2E18; -constexpr unsigned int mmCOMPUTE_USER_DATA_0 = 0x2E40; -constexpr unsigned int mmCOMPUTE_USER_DATA_1 = 0x2E41; -constexpr unsigned int mmCOMPUTE_USER_DATA_2 = 0x2E42; -constexpr unsigned int mmCOMPUTE_USER_DATA_3 = 0x2E43; -constexpr unsigned int mmCOMPUTE_USER_DATA_4 = 0x2E44; -constexpr unsigned int mmCOMPUTE_USER_DATA_5 = 0x2E45; -constexpr unsigned int mmCOMPUTE_USER_DATA_6 = 0x2E46; -constexpr unsigned int mmCOMPUTE_USER_DATA_7 = 0x2E47; -constexpr unsigned int mmCOMPUTE_USER_DATA_8 = 0x2E48; -constexpr unsigned int mmCOMPUTE_USER_DATA_9 = 0x2E49; -constexpr unsigned int mmCOMPUTE_USER_DATA_10 = 0x2E4A; -constexpr unsigned int mmCOMPUTE_USER_DATA_11 = 0x2E4B; -constexpr unsigned int mmCOMPUTE_USER_DATA_12 = 0x2E4C; -constexpr unsigned int mmCOMPUTE_USER_DATA_13 = 0x2E4D; -constexpr unsigned int mmCOMPUTE_USER_DATA_14 = 0x2E4E; -constexpr unsigned int mmCOMPUTE_USER_DATA_15 = 0x2E4F; -constexpr unsigned int mmCOMPUTE_VMID = 0x2E14; -constexpr unsigned int mmCPC_PERFCOUNTER0_HI = 0xD007; -constexpr unsigned int mmCPC_PERFCOUNTER0_LO = 0xD006; -constexpr unsigned int mmCPC_PERFCOUNTER0_SELECT = 0xD809; -constexpr unsigned int mmCPC_PERFCOUNTER0_SELECT1 = 0xD804; -constexpr unsigned int mmCPC_PERFCOUNTER1_HI = 0xD005; -constexpr unsigned int mmCPC_PERFCOUNTER1_LO = 0xD004; -constexpr unsigned int mmCPC_PERFCOUNTER1_SELECT = 0xD803; -constexpr unsigned int mmCPF_PERFCOUNTER0_HI = 0xD00B; -constexpr unsigned int mmCPF_PERFCOUNTER0_LO = 0xD00A; -constexpr unsigned int mmCPF_PERFCOUNTER0_SELECT = 0xD807; -constexpr unsigned int mmCPF_PERFCOUNTER0_SELECT1 = 0xD806; -constexpr unsigned int mmCPF_PERFCOUNTER1_HI = 0xD009; -constexpr unsigned int mmCPF_PERFCOUNTER1_LO = 0xD008; -constexpr unsigned int mmCPF_PERFCOUNTER1_SELECT = 0xD805; -constexpr unsigned int mmCPG_PERFCOUNTER0_HI = 0xD003; -constexpr unsigned int mmCPG_PERFCOUNTER0_LO = 0xD002; -constexpr unsigned int mmCPG_PERFCOUNTER0_SELECT = 0xD802; -constexpr unsigned int mmCPG_PERFCOUNTER0_SELECT1 = 0xD801; -constexpr unsigned int mmCPG_PERFCOUNTER1_HI = 0xD001; -constexpr unsigned int mmCPG_PERFCOUNTER1_LO = 0xD000; -constexpr unsigned int mmCPG_PERFCOUNTER1_SELECT = 0xD800; -constexpr unsigned int mmCP_ME_COHER_BASE = 0xC101; -constexpr unsigned int mmCP_ME_COHER_BASE_HI = 0xC102; -constexpr unsigned int mmCP_ME_COHER_CNTL = 0xC0FE; -constexpr unsigned int mmCP_ME_COHER_SIZE = 0xC0FF; -constexpr unsigned int mmCP_ME_COHER_SIZE_HI = 0xC100; -constexpr unsigned int mmCP_ME_COHER_STATUS = 0xC103; -constexpr unsigned int mmCP_PERFMON_CNTL = 0xD808; -constexpr unsigned int mmCP_PERFMON_CNTX_CNTL = 0xA0D8; -constexpr unsigned int mmCP_SC_PSINVOC_COUNT0_HI = 0xC02D; -constexpr unsigned int mmCP_SC_PSINVOC_COUNT0_LO = 0xC02C; -constexpr unsigned int mmCP_SC_PSINVOC_COUNT1_HI = 0xC02F; -constexpr unsigned int mmCP_SC_PSINVOC_COUNT1_LO = 0xC02E; -constexpr unsigned int mmCP_VGT_CSINVOC_COUNT_HI = 0xC031; -constexpr unsigned int mmCP_VGT_CSINVOC_COUNT_LO = 0xC030; -constexpr unsigned int mmCP_VGT_DSINVOC_COUNT_HI = 0xC027; -constexpr unsigned int mmCP_VGT_DSINVOC_COUNT_LO = 0xC026; -constexpr unsigned int mmCP_VGT_GSINVOC_COUNT_HI = 0xC023; -constexpr unsigned int mmCP_VGT_GSINVOC_COUNT_LO = 0xC022; -constexpr unsigned int mmCP_VGT_GSPRIM_COUNT_HI = 0xC01F; -constexpr unsigned int mmCP_VGT_GSPRIM_COUNT_LO = 0xC01E; -constexpr unsigned int mmCP_VGT_HSINVOC_COUNT_HI = 0xC025; -constexpr unsigned int mmCP_VGT_HSINVOC_COUNT_LO = 0xC024; -constexpr unsigned int mmCP_VGT_IAPRIM_COUNT_HI = 0xC01D; -constexpr unsigned int mmCP_VGT_IAPRIM_COUNT_LO = 0xC01C; -constexpr unsigned int mmCP_VGT_IAVERT_COUNT_HI = 0xC01B; -constexpr unsigned int mmCP_VGT_IAVERT_COUNT_LO = 0xC01A; -constexpr unsigned int mmCP_VGT_VSINVOC_COUNT_HI = 0xC021; -constexpr unsigned int mmCP_VGT_VSINVOC_COUNT_LO = 0xC020; -constexpr unsigned int mmDB_ALPHA_TO_MASK = 0xA2DC; -constexpr unsigned int mmDB_CGTT_CLK_CTRL_0 = 0xF0A4; -constexpr unsigned int mmDB_COUNT_CONTROL = 0xA001; -constexpr unsigned int mmDB_CREDIT_LIMIT = 0x2614; -constexpr unsigned int mmDB_DEPTH_BOUNDS_MAX = 0xA009; -constexpr unsigned int mmDB_DEPTH_BOUNDS_MIN = 0xA008; -constexpr unsigned int mmDB_DEPTH_CLEAR = 0xA00B; -constexpr unsigned int mmDB_DEPTH_CONTROL = 0xA200; -constexpr unsigned int mmDB_DEPTH_VIEW = 0xA002; -constexpr unsigned int mmDB_EQAA = 0xA201; -constexpr unsigned int mmDB_FIFO_DEPTH1 = 0x2618; -constexpr unsigned int mmDB_FIFO_DEPTH2 = 0x2619; -constexpr unsigned int mmDB_FREE_CACHELINES = 0x2617; -constexpr unsigned int mmDB_HTILE_DATA_BASE = 0xA005; -constexpr unsigned int mmDB_HTILE_SURFACE = 0xA2AF; -constexpr unsigned int mmDB_MEM_ARB_WATERMARKS = 0x261C; -constexpr unsigned int mmDB_OCCLUSION_COUNT0_HI = 0xC3C1; -constexpr unsigned int mmDB_OCCLUSION_COUNT0_LOW = 0xC3C0; -constexpr unsigned int mmDB_OCCLUSION_COUNT1_HI = 0xC3C3; -constexpr unsigned int mmDB_OCCLUSION_COUNT1_LOW = 0xC3C2; -constexpr unsigned int mmDB_OCCLUSION_COUNT2_HI = 0xC3C5; -constexpr unsigned int mmDB_OCCLUSION_COUNT2_LOW = 0xC3C4; -constexpr unsigned int mmDB_OCCLUSION_COUNT3_HI = 0xC3C7; -constexpr unsigned int mmDB_OCCLUSION_COUNT3_LOW = 0xC3C6; -constexpr unsigned int mmDB_PERFCOUNTER0_HI = 0xD441; -constexpr unsigned int mmDB_PERFCOUNTER0_LO = 0xD440; -constexpr unsigned int mmDB_PERFCOUNTER0_SELECT = 0xDC40; -constexpr unsigned int mmDB_PERFCOUNTER0_SELECT1 = 0xDC41; -constexpr unsigned int mmDB_PERFCOUNTER1_HI = 0xD443; -constexpr unsigned int mmDB_PERFCOUNTER1_LO = 0xD442; -constexpr unsigned int mmDB_PERFCOUNTER1_SELECT = 0xDC42; -constexpr unsigned int mmDB_PERFCOUNTER1_SELECT1 = 0xDC43; -constexpr unsigned int mmDB_PERFCOUNTER2_HI = 0xD445; -constexpr unsigned int mmDB_PERFCOUNTER2_LO = 0xD444; -constexpr unsigned int mmDB_PERFCOUNTER2_SELECT = 0xDC44; -constexpr unsigned int mmDB_PERFCOUNTER3_HI = 0xD447; -constexpr unsigned int mmDB_PERFCOUNTER3_LO = 0xD446; -constexpr unsigned int mmDB_PERFCOUNTER3_SELECT = 0xDC46; -constexpr unsigned int mmDB_PRELOAD_CONTROL = 0xA2B2; -constexpr unsigned int mmDB_RENDER_CONTROL = 0xA000; -constexpr unsigned int mmDB_RENDER_OVERRIDE = 0xA003; -constexpr unsigned int mmDB_RENDER_OVERRIDE2 = 0xA004; -constexpr unsigned int mmDB_SHADER_CONTROL = 0xA203; -constexpr unsigned int mmDB_SRESULTS_COMPARE_STATE0 = 0xA2B0; -constexpr unsigned int mmDB_SRESULTS_COMPARE_STATE1 = 0xA2B1; -constexpr unsigned int mmDB_STENCILREFMASK = 0xA10C; -constexpr unsigned int mmDB_STENCILREFMASK_BF = 0xA10D; -constexpr unsigned int mmDB_STENCIL_CLEAR = 0xA00A; -constexpr unsigned int mmDB_STENCIL_CONTROL = 0xA10B; -constexpr unsigned int mmDB_SUBTILE_CONTROL = 0x2616; -constexpr unsigned int mmDB_WATERMARKS = 0x2615; -constexpr unsigned int mmDB_Z_WRITE_BASE = 0xA014; -constexpr unsigned int mmGB_ADDR_CONFIG = 0x263E; -constexpr unsigned int mmGB_ADDR_CONFIG_READ = 0x2642; -constexpr unsigned int mmGRBM_CHIP_REVISION = 0x2021; -constexpr unsigned int mmGRBM_GFX_INDEX = 0xC200; -constexpr unsigned int mmGRBM_GFX_INDEX_SR_DATA = 0xFA01; -constexpr unsigned int mmGRBM_GFX_INDEX_SR_SELECT = 0xFA00; -constexpr unsigned int mmGRBM_PERFCOUNTER0_HI = 0xD041; -constexpr unsigned int mmGRBM_PERFCOUNTER0_LO = 0xD040; -constexpr unsigned int mmGRBM_PERFCOUNTER0_SELECT = 0xD840; -constexpr unsigned int mmGRBM_PERFCOUNTER1_HI = 0xD044; -constexpr unsigned int mmGRBM_PERFCOUNTER1_LO = 0xD043; -constexpr unsigned int mmGRBM_PERFCOUNTER1_SELECT = 0xD841; -constexpr unsigned int mmGRBM_SE0_PERFCOUNTER_HI = 0xD046; -constexpr unsigned int mmGRBM_SE0_PERFCOUNTER_LO = 0xD045; -constexpr unsigned int mmGRBM_SE0_PERFCOUNTER_SELECT = 0xD842; -constexpr unsigned int mmIA_ENHANCE = 0xA29C; -constexpr unsigned int mmIA_UTCL1_CNTL = 0x2246; -constexpr unsigned int mmIA_UTCL1_STATUS = 0x2247; -constexpr unsigned int mmPA_CL_CLIP_CNTL = 0xA204; -constexpr unsigned int mmPA_CL_CNTL_STATUS = 0x2284; -constexpr unsigned int mmPA_CL_ENHANCE = 0x2285; -constexpr unsigned int mmPA_CL_GB_HORZ_CLIP_ADJ = 0xA2FC; -constexpr unsigned int mmPA_CL_GB_HORZ_DISC_ADJ = 0xA2FD; -constexpr unsigned int mmPA_CL_GB_VERT_CLIP_ADJ = 0xA2FA; -constexpr unsigned int mmPA_CL_GB_VERT_DISC_ADJ = 0xA2FB; -constexpr unsigned int mmPA_CL_NANINF_CNTL = 0xA208; -constexpr unsigned int mmPA_CL_NGG_CNTL = 0xA20E; -constexpr unsigned int mmPA_CL_POINT_CULL_RAD = 0xA1F8; -constexpr unsigned int mmPA_CL_POINT_SIZE = 0xA1F7; -constexpr unsigned int mmPA_CL_POINT_X_RAD = 0xA1F5; -constexpr unsigned int mmPA_CL_POINT_Y_RAD = 0xA1F6; -constexpr unsigned int mmPA_CL_UCP_0_W = 0xA172; -constexpr unsigned int mmPA_CL_UCP_0_X = 0xA16F; -constexpr unsigned int mmPA_CL_UCP_0_Y = 0xA170; -constexpr unsigned int mmPA_CL_UCP_0_Z = 0xA171; -constexpr unsigned int mmPA_CL_UCP_1_W = 0xA176; -constexpr unsigned int mmPA_CL_UCP_1_X = 0xA173; -constexpr unsigned int mmPA_CL_UCP_1_Y = 0xA174; -constexpr unsigned int mmPA_CL_UCP_1_Z = 0xA175; -constexpr unsigned int mmPA_CL_UCP_2_W = 0xA17A; -constexpr unsigned int mmPA_CL_UCP_2_X = 0xA177; -constexpr unsigned int mmPA_CL_UCP_2_Y = 0xA178; -constexpr unsigned int mmPA_CL_UCP_2_Z = 0xA179; -constexpr unsigned int mmPA_CL_UCP_3_W = 0xA17E; -constexpr unsigned int mmPA_CL_UCP_3_X = 0xA17B; -constexpr unsigned int mmPA_CL_UCP_3_Y = 0xA17C; -constexpr unsigned int mmPA_CL_UCP_3_Z = 0xA17D; -constexpr unsigned int mmPA_CL_UCP_4_W = 0xA182; -constexpr unsigned int mmPA_CL_UCP_4_X = 0xA17F; -constexpr unsigned int mmPA_CL_UCP_4_Y = 0xA180; -constexpr unsigned int mmPA_CL_UCP_4_Z = 0xA181; -constexpr unsigned int mmPA_CL_UCP_5_W = 0xA186; -constexpr unsigned int mmPA_CL_UCP_5_X = 0xA183; -constexpr unsigned int mmPA_CL_UCP_5_Y = 0xA184; -constexpr unsigned int mmPA_CL_UCP_5_Z = 0xA185; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET = 0xA110; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_1 = 0xA116; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_2 = 0xA11C; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_3 = 0xA122; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_4 = 0xA128; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_5 = 0xA12E; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_6 = 0xA134; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_7 = 0xA13A; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_8 = 0xA140; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_9 = 0xA146; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_10 = 0xA14C; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_11 = 0xA152; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_12 = 0xA158; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_13 = 0xA15E; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_14 = 0xA164; -constexpr unsigned int mmPA_CL_VPORT_XOFFSET_15 = 0xA16A; -constexpr unsigned int mmPA_CL_VPORT_XSCALE = 0xA10F; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_1 = 0xA115; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_2 = 0xA11B; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_3 = 0xA121; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_4 = 0xA127; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_5 = 0xA12D; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_6 = 0xA133; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_7 = 0xA139; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_8 = 0xA13F; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_9 = 0xA145; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_10 = 0xA14B; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_11 = 0xA151; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_12 = 0xA157; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_13 = 0xA15D; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_14 = 0xA163; -constexpr unsigned int mmPA_CL_VPORT_XSCALE_15 = 0xA169; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET = 0xA112; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_1 = 0xA118; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_2 = 0xA11E; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_3 = 0xA124; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_4 = 0xA12A; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_5 = 0xA130; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_6 = 0xA136; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_7 = 0xA13C; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_8 = 0xA142; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_9 = 0xA148; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_10 = 0xA14E; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_11 = 0xA154; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_12 = 0xA15A; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_13 = 0xA160; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_14 = 0xA166; -constexpr unsigned int mmPA_CL_VPORT_YOFFSET_15 = 0xA16C; -constexpr unsigned int mmPA_CL_VPORT_YSCALE = 0xA111; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_1 = 0xA117; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_2 = 0xA11D; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_3 = 0xA123; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_4 = 0xA129; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_5 = 0xA12F; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_6 = 0xA135; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_7 = 0xA13B; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_8 = 0xA141; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_9 = 0xA147; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_10 = 0xA14D; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_11 = 0xA153; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_12 = 0xA159; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_13 = 0xA15F; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_14 = 0xA165; -constexpr unsigned int mmPA_CL_VPORT_YSCALE_15 = 0xA16B; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET = 0xA114; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_1 = 0xA11A; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_2 = 0xA120; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_3 = 0xA126; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_4 = 0xA12C; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_5 = 0xA132; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_6 = 0xA138; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_7 = 0xA13E; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_8 = 0xA144; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_9 = 0xA14A; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_10 = 0xA150; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_11 = 0xA156; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_12 = 0xA15C; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_13 = 0xA162; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_14 = 0xA168; -constexpr unsigned int mmPA_CL_VPORT_ZOFFSET_15 = 0xA16E; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE = 0xA113; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_1 = 0xA119; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_2 = 0xA11F; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_3 = 0xA125; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_4 = 0xA12B; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_5 = 0xA131; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_6 = 0xA137; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_7 = 0xA13D; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_8 = 0xA143; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_9 = 0xA149; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_10 = 0xA14F; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_11 = 0xA155; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_12 = 0xA15B; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_13 = 0xA161; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_14 = 0xA167; -constexpr unsigned int mmPA_CL_VPORT_ZSCALE_15 = 0xA16D; -constexpr unsigned int mmPA_CL_VS_OUT_CNTL = 0xA207; -constexpr unsigned int mmPA_CL_VTE_CNTL = 0xA206; -constexpr unsigned int mmPA_SC_AA_CONFIG = 0xA2F8; -constexpr unsigned int mmPA_SC_AA_MASK_X0Y0_X1Y0 = 0xA30E; -constexpr unsigned int mmPA_SC_AA_MASK_X0Y1_X1Y1 = 0xA30F; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 = 0xA2FE; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1 = 0xA2FF; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2 = 0xA300; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3 = 0xA301; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 = 0xA306; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1 = 0xA307; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2 = 0xA308; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3 = 0xA309; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 = 0xA302; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1 = 0xA303; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2 = 0xA304; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3 = 0xA305; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 = 0xA30A; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1 = 0xA30B; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2 = 0xA30C; -constexpr unsigned int mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3 = 0xA30D; -constexpr unsigned int mmPA_SC_BINNER_CNTL_0 = 0xA311; -constexpr unsigned int mmPA_SC_BINNER_CNTL_1 = 0xA312; -constexpr unsigned int mmPA_SC_CENTROID_PRIORITY_0 = 0xA2F5; -constexpr unsigned int mmPA_SC_CENTROID_PRIORITY_1 = 0xA2F6; -constexpr unsigned int mmPA_SC_CLIPRECT_0_BR = 0xA085; -constexpr unsigned int mmPA_SC_CLIPRECT_0_TL = 0xA084; -constexpr unsigned int mmPA_SC_CLIPRECT_1_BR = 0xA087; -constexpr unsigned int mmPA_SC_CLIPRECT_1_TL = 0xA086; -constexpr unsigned int mmPA_SC_CLIPRECT_2_BR = 0xA089; -constexpr unsigned int mmPA_SC_CLIPRECT_2_TL = 0xA088; -constexpr unsigned int mmPA_SC_CLIPRECT_3_BR = 0xA08B; -constexpr unsigned int mmPA_SC_CLIPRECT_3_TL = 0xA08A; -constexpr unsigned int mmPA_SC_CLIPRECT_RULE = 0xA083; -constexpr unsigned int mmPA_SC_CONSERVATIVE_RASTERIZATION_CNTL = 0xA313; -constexpr unsigned int mmPA_SC_EDGERULE = 0xA08C; -constexpr unsigned int mmPA_SC_FIFO_DEPTH_CNTL = 0x2295; -constexpr unsigned int mmPA_SC_GENERIC_SCISSOR_BR = 0xA091; -constexpr unsigned int mmPA_SC_GENERIC_SCISSOR_TL = 0xA090; -constexpr unsigned int mmPA_SC_HP3D_TRAP_SCREEN_COUNT = 0xC2AC; -constexpr unsigned int mmPA_SC_HP3D_TRAP_SCREEN_H = 0xC2A9; -constexpr unsigned int mmPA_SC_HP3D_TRAP_SCREEN_HV_EN = 0xC2A8; -constexpr unsigned int mmPA_SC_HP3D_TRAP_SCREEN_OCCURRENCE = 0xC2AB; -constexpr unsigned int mmPA_SC_HP3D_TRAP_SCREEN_V = 0xC2AA; -constexpr unsigned int mmPA_SC_LINE_CNTL = 0xA2F7; -constexpr unsigned int mmPA_SC_LINE_STIPPLE = 0xA283; -constexpr unsigned int mmPA_SC_LINE_STIPPLE_STATE = 0xC281; -constexpr unsigned int mmPA_SC_MODE_CNTL_0 = 0xA292; -constexpr unsigned int mmPA_SC_MODE_CNTL_1 = 0xA293; -constexpr unsigned int mmPA_SC_NGG_MODE_CNTL = 0xA314; -constexpr unsigned int mmPA_SC_P3D_TRAP_SCREEN_COUNT = 0xC2A4; -constexpr unsigned int mmPA_SC_P3D_TRAP_SCREEN_H = 0xC2A1; -constexpr unsigned int mmPA_SC_P3D_TRAP_SCREEN_HV_EN = 0xC2A0; -constexpr unsigned int mmPA_SC_P3D_TRAP_SCREEN_OCCURRENCE = 0xC2A3; -constexpr unsigned int mmPA_SC_P3D_TRAP_SCREEN_V = 0xC2A2; -constexpr unsigned int mmPA_SC_PERFCOUNTER0_HI = 0xD141; -constexpr unsigned int mmPA_SC_PERFCOUNTER0_LO = 0xD140; -constexpr unsigned int mmPA_SC_PERFCOUNTER0_SELECT = 0xD940; -constexpr unsigned int mmPA_SC_PERFCOUNTER0_SELECT1 = 0xD941; -constexpr unsigned int mmPA_SC_PERFCOUNTER1_HI = 0xD143; -constexpr unsigned int mmPA_SC_PERFCOUNTER1_LO = 0xD142; -constexpr unsigned int mmPA_SC_PERFCOUNTER1_SELECT = 0xD942; -constexpr unsigned int mmPA_SC_PERFCOUNTER2_HI = 0xD145; -constexpr unsigned int mmPA_SC_PERFCOUNTER2_LO = 0xD144; -constexpr unsigned int mmPA_SC_PERFCOUNTER2_SELECT = 0xD943; -constexpr unsigned int mmPA_SC_PERFCOUNTER3_HI = 0xD147; -constexpr unsigned int mmPA_SC_PERFCOUNTER3_LO = 0xD146; -constexpr unsigned int mmPA_SC_PERFCOUNTER3_SELECT = 0xD944; -constexpr unsigned int mmPA_SC_PERFCOUNTER4_HI = 0xD149; -constexpr unsigned int mmPA_SC_PERFCOUNTER4_LO = 0xD148; -constexpr unsigned int mmPA_SC_PERFCOUNTER4_SELECT = 0xD945; -constexpr unsigned int mmPA_SC_PERFCOUNTER5_HI = 0xD14B; -constexpr unsigned int mmPA_SC_PERFCOUNTER5_LO = 0xD14A; -constexpr unsigned int mmPA_SC_PERFCOUNTER5_SELECT = 0xD946; -constexpr unsigned int mmPA_SC_PERFCOUNTER6_HI = 0xD14D; -constexpr unsigned int mmPA_SC_PERFCOUNTER6_LO = 0xD14C; -constexpr unsigned int mmPA_SC_PERFCOUNTER6_SELECT = 0xD947; -constexpr unsigned int mmPA_SC_PERFCOUNTER7_HI = 0xD14F; -constexpr unsigned int mmPA_SC_PERFCOUNTER7_LO = 0xD14E; -constexpr unsigned int mmPA_SC_PERFCOUNTER7_SELECT = 0xD948; -constexpr unsigned int mmPA_SC_RASTER_CONFIG = 0xA0D4; -constexpr unsigned int mmPA_SC_RASTER_CONFIG_1 = 0xA0D5; -constexpr unsigned int mmPA_SC_SCREEN_SCISSOR_BR = 0xA00D; -constexpr unsigned int mmPA_SC_SCREEN_SCISSOR_TL = 0xA00C; -constexpr unsigned int mmPA_SC_SHADER_CONTROL = 0xA310; -constexpr unsigned int mmPA_SC_TILE_STEERING_OVERRIDE = 0xA0D7; -constexpr unsigned int mmPA_SC_TRAP_SCREEN_COUNT = 0xC2B4; -constexpr unsigned int mmPA_SC_TRAP_SCREEN_H = 0xC2B1; -constexpr unsigned int mmPA_SC_TRAP_SCREEN_HV_EN = 0xC2B0; -constexpr unsigned int mmPA_SC_TRAP_SCREEN_OCCURRENCE = 0xC2B3; -constexpr unsigned int mmPA_SC_TRAP_SCREEN_V = 0xC2B2; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_0_BR = 0xA095; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_0_TL = 0xA094; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_1_BR = 0xA097; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_1_TL = 0xA096; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_2_BR = 0xA099; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_2_TL = 0xA098; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_3_BR = 0xA09B; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_3_TL = 0xA09A; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_4_BR = 0xA09D; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_4_TL = 0xA09C; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_5_BR = 0xA09F; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_5_TL = 0xA09E; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_6_BR = 0xA0A1; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_6_TL = 0xA0A0; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_7_BR = 0xA0A3; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_7_TL = 0xA0A2; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_8_BR = 0xA0A5; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_8_TL = 0xA0A4; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_9_BR = 0xA0A7; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_9_TL = 0xA0A6; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_10_BR = 0xA0A9; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_10_TL = 0xA0A8; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_11_BR = 0xA0AB; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_11_TL = 0xA0AA; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_12_BR = 0xA0AD; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_12_TL = 0xA0AC; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_13_BR = 0xA0AF; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_13_TL = 0xA0AE; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_14_BR = 0xA0B1; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_14_TL = 0xA0B0; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_15_BR = 0xA0B3; -constexpr unsigned int mmPA_SC_VPORT_SCISSOR_15_TL = 0xA0B2; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_0 = 0xA0B5; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_1 = 0xA0B7; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_2 = 0xA0B9; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_3 = 0xA0BB; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_4 = 0xA0BD; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_5 = 0xA0BF; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_6 = 0xA0C1; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_7 = 0xA0C3; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_8 = 0xA0C5; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_9 = 0xA0C7; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_10 = 0xA0C9; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_11 = 0xA0CB; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_12 = 0xA0CD; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_13 = 0xA0CF; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_14 = 0xA0D1; -constexpr unsigned int mmPA_SC_VPORT_ZMAX_15 = 0xA0D3; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_0 = 0xA0B4; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_1 = 0xA0B6; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_2 = 0xA0B8; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_3 = 0xA0BA; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_4 = 0xA0BC; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_5 = 0xA0BE; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_6 = 0xA0C0; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_7 = 0xA0C2; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_8 = 0xA0C4; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_9 = 0xA0C6; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_10 = 0xA0C8; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_11 = 0xA0CA; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_12 = 0xA0CC; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_13 = 0xA0CE; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_14 = 0xA0D0; -constexpr unsigned int mmPA_SC_VPORT_ZMIN_15 = 0xA0D2; -constexpr unsigned int mmPA_SC_WINDOW_OFFSET = 0xA080; -constexpr unsigned int mmPA_SC_WINDOW_SCISSOR_BR = 0xA082; -constexpr unsigned int mmPA_SC_WINDOW_SCISSOR_TL = 0xA081; -constexpr unsigned int mmPA_SU_CNTL_STATUS = 0x2294; -constexpr unsigned int mmPA_SU_HARDWARE_SCREEN_OFFSET = 0xA08D; -constexpr unsigned int mmPA_SU_LINE_CNTL = 0xA282; -constexpr unsigned int mmPA_SU_LINE_STIPPLE_CNTL = 0xA209; -constexpr unsigned int mmPA_SU_LINE_STIPPLE_SCALE = 0xA20A; -constexpr unsigned int mmPA_SU_LINE_STIPPLE_VALUE = 0xC280; -constexpr unsigned int mmPA_SU_OVER_RASTERIZATION_CNTL = 0xA20F; -constexpr unsigned int mmPA_SU_PERFCOUNTER0_HI = 0xD101; -constexpr unsigned int mmPA_SU_PERFCOUNTER0_LO = 0xD100; -constexpr unsigned int mmPA_SU_PERFCOUNTER0_SELECT = 0xD900; -constexpr unsigned int mmPA_SU_PERFCOUNTER0_SELECT1 = 0xD901; -constexpr unsigned int mmPA_SU_PERFCOUNTER1_HI = 0xD103; -constexpr unsigned int mmPA_SU_PERFCOUNTER1_LO = 0xD102; -constexpr unsigned int mmPA_SU_PERFCOUNTER1_SELECT = 0xD902; -constexpr unsigned int mmPA_SU_PERFCOUNTER1_SELECT1 = 0xD903; -constexpr unsigned int mmPA_SU_PERFCOUNTER2_HI = 0xD105; -constexpr unsigned int mmPA_SU_PERFCOUNTER2_LO = 0xD104; -constexpr unsigned int mmPA_SU_PERFCOUNTER2_SELECT = 0xD904; -constexpr unsigned int mmPA_SU_PERFCOUNTER3_HI = 0xD107; -constexpr unsigned int mmPA_SU_PERFCOUNTER3_LO = 0xD106; -constexpr unsigned int mmPA_SU_POINT_MINMAX = 0xA281; -constexpr unsigned int mmPA_SU_POINT_SIZE = 0xA280; -constexpr unsigned int mmPA_SU_POLY_OFFSET_BACK_OFFSET = 0xA2E3; -constexpr unsigned int mmPA_SU_POLY_OFFSET_BACK_SCALE = 0xA2E2; -constexpr unsigned int mmPA_SU_POLY_OFFSET_CLAMP = 0xA2DF; -constexpr unsigned int mmPA_SU_POLY_OFFSET_DB_FMT_CNTL = 0xA2DE; -constexpr unsigned int mmPA_SU_POLY_OFFSET_FRONT_OFFSET = 0xA2E1; -constexpr unsigned int mmPA_SU_POLY_OFFSET_FRONT_SCALE = 0xA2E0; -constexpr unsigned int mmPA_SU_PRIM_FILTER_CNTL = 0xA20B; -constexpr unsigned int mmPA_SU_SC_MODE_CNTL = 0xA205; -constexpr unsigned int mmPA_SU_SMALL_PRIM_FILTER_CNTL = 0xA20C; -constexpr unsigned int mmPA_SU_VTX_CNTL = 0xA2F9; -constexpr unsigned int mmRLC_CGTT_MGCG_OVERRIDE = 0xEC48; -constexpr unsigned int mmRLC_PERFCOUNTER0_HI = 0xD481; -constexpr unsigned int mmRLC_PERFCOUNTER0_LO = 0xD480; -constexpr unsigned int mmRLC_PERFCOUNTER0_SELECT = 0xDCC1; -constexpr unsigned int mmRLC_PERFCOUNTER1_HI = 0xD483; -constexpr unsigned int mmRLC_PERFCOUNTER1_LO = 0xD482; -constexpr unsigned int mmRLC_PERFCOUNTER1_SELECT = 0xDCC2; -constexpr unsigned int mmRLC_PERFMON_CNTL = 0xDCC0; -constexpr unsigned int mmRLC_SPM_PERFMON_CNTL = 0xDC80; -constexpr unsigned int mmRLC_SPM_PERFMON_RING_BASE_HI = 0xDC82; -constexpr unsigned int mmRLC_SPM_PERFMON_RING_BASE_LO = 0xDC81; -constexpr unsigned int mmRLC_SPM_PERFMON_RING_SIZE = 0xDC83; -constexpr unsigned int mmRLC_SPM_UTCL1_CNTL = 0xECB5; -constexpr unsigned int mmRLC_SPM_UTCL1_ERROR_1 = 0xECBC; -constexpr unsigned int mmRLC_SPM_UTCL1_ERROR_2 = 0xECBD; -constexpr unsigned int mmRMI_PERFCOUNTER0_HI = 0xD4C1; -constexpr unsigned int mmRMI_PERFCOUNTER0_LO = 0xD4C0; -constexpr unsigned int mmRMI_PERFCOUNTER0_SELECT = 0xDD00; -constexpr unsigned int mmRMI_PERFCOUNTER0_SELECT1 = 0xDD01; -constexpr unsigned int mmRMI_PERFCOUNTER1_HI = 0xD4C3; -constexpr unsigned int mmRMI_PERFCOUNTER1_LO = 0xD4C2; -constexpr unsigned int mmRMI_PERFCOUNTER1_SELECT = 0xDD02; -constexpr unsigned int mmRMI_PERFCOUNTER2_HI = 0xD4C5; -constexpr unsigned int mmRMI_PERFCOUNTER2_LO = 0xD4C4; -constexpr unsigned int mmRMI_PERFCOUNTER2_SELECT = 0xDD03; -constexpr unsigned int mmRMI_PERFCOUNTER2_SELECT1 = 0xDD04; -constexpr unsigned int mmRMI_PERFCOUNTER3_HI = 0xD4C7; -constexpr unsigned int mmRMI_PERFCOUNTER3_LO = 0xD4C6; -constexpr unsigned int mmRMI_PERFCOUNTER3_SELECT = 0xDD05; -constexpr unsigned int mmRMI_PERF_COUNTER_CNTL = 0xDD06; -constexpr unsigned int mmSPI_ARB_CYCLES_0 = 0x31C1; -constexpr unsigned int mmSPI_ARB_CYCLES_1 = 0x31C2; -constexpr unsigned int mmSPI_ARB_PRIORITY = 0x31C0; -constexpr unsigned int mmSPI_BARYC_CNTL = 0xA1B8; -constexpr unsigned int mmSPI_CSQ_WF_ACTIVE_COUNT_0 = 0x24DC; -constexpr unsigned int mmSPI_CSQ_WF_ACTIVE_COUNT_1 = 0x24DD; -constexpr unsigned int mmSPI_CSQ_WF_ACTIVE_COUNT_2 = 0x24DE; -constexpr unsigned int mmSPI_CSQ_WF_ACTIVE_COUNT_3 = 0x24DF; -constexpr unsigned int mmSPI_CSQ_WF_ACTIVE_STATUS = 0x24DB; -constexpr unsigned int mmSPI_DSM_CNTL = 0x2443; -constexpr unsigned int mmSPI_DSM_CNTL2 = 0x2444; -constexpr unsigned int mmSPI_EDC_CNT = 0x2445; -constexpr unsigned int mmSPI_GDS_CREDITS = 0x24D8; -constexpr unsigned int mmSPI_GFX_CNTL = 0x243C; -constexpr unsigned int mmSPI_INTERP_CONTROL_0 = 0xA1B5; -constexpr unsigned int mmSPI_LB_CTR_CTRL = 0x24D4; -constexpr unsigned int mmSPI_LB_DATA_REG = 0x24D6; -constexpr unsigned int mmSPI_LB_DATA_WAVES = 0x24E4; -constexpr unsigned int mmSPI_P0_TRAP_SCREEN_GPR_MIN = 0x24F0; -constexpr unsigned int mmSPI_P0_TRAP_SCREEN_PSBA_HI = 0x24ED; -constexpr unsigned int mmSPI_P0_TRAP_SCREEN_PSBA_LO = 0x24EC; -constexpr unsigned int mmSPI_P0_TRAP_SCREEN_PSMA_HI = 0x24EF; -constexpr unsigned int mmSPI_P0_TRAP_SCREEN_PSMA_LO = 0x24EE; -constexpr unsigned int mmSPI_P1_TRAP_SCREEN_GPR_MIN = 0x24F5; -constexpr unsigned int mmSPI_P1_TRAP_SCREEN_PSBA_HI = 0x24F2; -constexpr unsigned int mmSPI_P1_TRAP_SCREEN_PSBA_LO = 0x24F1; -constexpr unsigned int mmSPI_P1_TRAP_SCREEN_PSMA_HI = 0x24F4; -constexpr unsigned int mmSPI_P1_TRAP_SCREEN_PSMA_LO = 0x24F3; -constexpr unsigned int mmSPI_PERFCOUNTER0_HI = 0xD180; -constexpr unsigned int mmSPI_PERFCOUNTER0_LO = 0xD181; -constexpr unsigned int mmSPI_PERFCOUNTER0_SELECT = 0xD980; -constexpr unsigned int mmSPI_PERFCOUNTER0_SELECT1 = 0xD984; -constexpr unsigned int mmSPI_PERFCOUNTER1_HI = 0xD182; -constexpr unsigned int mmSPI_PERFCOUNTER1_LO = 0xD183; -constexpr unsigned int mmSPI_PERFCOUNTER1_SELECT = 0xD981; -constexpr unsigned int mmSPI_PERFCOUNTER1_SELECT1 = 0xD985; -constexpr unsigned int mmSPI_PERFCOUNTER2_HI = 0xD184; -constexpr unsigned int mmSPI_PERFCOUNTER2_LO = 0xD185; -constexpr unsigned int mmSPI_PERFCOUNTER2_SELECT = 0xD982; -constexpr unsigned int mmSPI_PERFCOUNTER2_SELECT1 = 0xD986; -constexpr unsigned int mmSPI_PERFCOUNTER3_HI = 0xD186; -constexpr unsigned int mmSPI_PERFCOUNTER3_LO = 0xD187; -constexpr unsigned int mmSPI_PERFCOUNTER3_SELECT = 0xD983; -constexpr unsigned int mmSPI_PERFCOUNTER3_SELECT1 = 0xD987; -constexpr unsigned int mmSPI_PERFCOUNTER4_HI = 0xD188; -constexpr unsigned int mmSPI_PERFCOUNTER4_LO = 0xD189; -constexpr unsigned int mmSPI_PERFCOUNTER4_SELECT = 0xD988; -constexpr unsigned int mmSPI_PERFCOUNTER5_HI = 0xD18A; -constexpr unsigned int mmSPI_PERFCOUNTER5_LO = 0xD18B; -constexpr unsigned int mmSPI_PERFCOUNTER5_SELECT = 0xD989; -constexpr unsigned int mmSPI_PERFCOUNTER_BINS = 0xD98A; -constexpr unsigned int mmSPI_PS_INPUT_ADDR = 0xA1B4; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_0 = 0xA191; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_1 = 0xA192; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_2 = 0xA193; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_3 = 0xA194; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_4 = 0xA195; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_5 = 0xA196; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_6 = 0xA197; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_7 = 0xA198; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_8 = 0xA199; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_9 = 0xA19A; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_10 = 0xA19B; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_11 = 0xA19C; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_12 = 0xA19D; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_13 = 0xA19E; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_14 = 0xA19F; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_15 = 0xA1A0; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_16 = 0xA1A1; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_17 = 0xA1A2; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_18 = 0xA1A3; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_19 = 0xA1A4; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_20 = 0xA1A5; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_21 = 0xA1A6; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_22 = 0xA1A7; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_23 = 0xA1A8; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_24 = 0xA1A9; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_25 = 0xA1AA; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_26 = 0xA1AB; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_27 = 0xA1AC; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_28 = 0xA1AD; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_29 = 0xA1AE; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_30 = 0xA1AF; -constexpr unsigned int mmSPI_PS_INPUT_CNTL_31 = 0xA1B0; -constexpr unsigned int mmSPI_PS_INPUT_ENA = 0xA1B3; -constexpr unsigned int mmSPI_PS_IN_CONTROL = 0xA1B6; -constexpr unsigned int mmSPI_PS_MAX_WAVE_ID = 0x243A; -constexpr unsigned int mmSPI_SHADER_COL_FORMAT = 0xA1C5; -constexpr unsigned int mmSPI_SHADER_PGM_HI_GS = 0x2C89; -constexpr unsigned int mmSPI_SHADER_PGM_HI_HS = 0x2D09; -constexpr unsigned int mmSPI_SHADER_PGM_HI_PS = 0x2C09; -constexpr unsigned int mmSPI_SHADER_PGM_LO_GS = 0x2C88; -constexpr unsigned int mmSPI_SHADER_PGM_LO_HS = 0x2D08; -constexpr unsigned int mmSPI_SHADER_PGM_LO_PS = 0x2C08; -constexpr unsigned int mmSPI_SHADER_PGM_RSRC1_GS = 0x2C8A; -constexpr unsigned int mmSPI_SHADER_PGM_RSRC1_HS = 0x2D0A; -constexpr unsigned int mmSPI_SHADER_PGM_RSRC1_PS = 0x2C0A; -constexpr unsigned int mmSPI_SHADER_PGM_RSRC2_GS = 0x2C8B; -constexpr unsigned int mmSPI_SHADER_PGM_RSRC2_HS = 0x2D0B; -constexpr unsigned int mmSPI_SHADER_PGM_RSRC2_PS = 0x2C0B; -constexpr unsigned int mmSPI_SHADER_PGM_RSRC3_GS = 0x2C87; -constexpr unsigned int mmSPI_SHADER_PGM_RSRC3_HS = 0x2D07; -constexpr unsigned int mmSPI_SHADER_PGM_RSRC3_PS = 0x2C07; -constexpr unsigned int mmSPI_SHADER_PGM_RSRC4_GS = 0x2C81; -constexpr unsigned int mmSPI_SHADER_PGM_RSRC4_HS = 0x2D01; -constexpr unsigned int mmSPI_SHADER_POS_FORMAT = 0xA1C3; -constexpr unsigned int mmSPI_SHADER_USER_DATA_ADDR_HI_GS = 0x2C83; -constexpr unsigned int mmSPI_SHADER_USER_DATA_ADDR_HI_HS = 0x2D03; -constexpr unsigned int mmSPI_SHADER_USER_DATA_ADDR_LO_GS = 0x2C82; -constexpr unsigned int mmSPI_SHADER_USER_DATA_ADDR_LO_HS = 0x2D02; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_0 = 0x2C0C; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_1 = 0x2C0D; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_2 = 0x2C0E; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_3 = 0x2C0F; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_4 = 0x2C10; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_5 = 0x2C11; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_6 = 0x2C12; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_7 = 0x2C13; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_8 = 0x2C14; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_9 = 0x2C15; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_10 = 0x2C16; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_11 = 0x2C17; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_12 = 0x2C18; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_13 = 0x2C19; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_14 = 0x2C1A; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_15 = 0x2C1B; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_16 = 0x2C1C; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_17 = 0x2C1D; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_18 = 0x2C1E; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_19 = 0x2C1F; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_20 = 0x2C20; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_21 = 0x2C21; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_22 = 0x2C22; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_23 = 0x2C23; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_24 = 0x2C24; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_25 = 0x2C25; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_26 = 0x2C26; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_27 = 0x2C27; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_28 = 0x2C28; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_29 = 0x2C29; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_30 = 0x2C2A; -constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_31 = 0x2C2B; -constexpr unsigned int mmSPI_SHADER_Z_FORMAT = 0xA1C4; -constexpr unsigned int mmSPI_SX_EXPORT_BUFFER_SIZES = 0x24D9; -constexpr unsigned int mmSPI_SX_SCOREBOARD_BUFFER_SIZES = 0x24DA; -constexpr unsigned int mmSPI_TMPRING_SIZE = 0xA1BA; -constexpr unsigned int mmSPI_VS_OUT_CONFIG = 0xA1B1; -constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_CS0 = 0x31C9; -constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_CS1 = 0x31CA; -constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_CS2 = 0x31CB; -constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_CS3 = 0x31CC; -constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_GFX = 0x31C7; -constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_HP3D = 0x31C8; -constexpr unsigned int mmSPI_WF_LIFETIME_CNTL = 0x24AA; -constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_0 = 0x24AB; -constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_2 = 0x24AD; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_0 = 0x24B5; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_2 = 0x24B7; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_4 = 0x24B9; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_6 = 0x24BB; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_7 = 0x24BC; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_9 = 0x24BE; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_11 = 0x24C0; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_13 = 0x24C2; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_14 = 0x24C3; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_15 = 0x24C4; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_16 = 0x24C5; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_17 = 0x24C6; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_18 = 0x24C7; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_19 = 0x24C8; -constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_20 = 0x24C9; -constexpr unsigned int mmSQ_ALU_CLK_CTRL = 0xF08E; -constexpr unsigned int mmSQ_CMD = 0x237B; -constexpr unsigned int mmSQ_CONFIG = 0x2300; -constexpr unsigned int mmSQ_DSM_CNTL = 0x2306; -constexpr unsigned int mmSQ_DSM_CNTL2 = 0x2307; -constexpr unsigned int mmSQ_FIFO_SIZES = 0x2305; -constexpr unsigned int mmSQ_IND_DATA = 0x2379; -constexpr unsigned int mmSQ_IND_INDEX = 0x2378; -constexpr unsigned int mmSQ_LDS_CLK_CTRL = 0xF090; -constexpr unsigned int mmSQ_PERFCOUNTER0_LO = 0xD1C0; -constexpr unsigned int mmSQ_PERFCOUNTER0_SELECT = 0xD9C0; -constexpr unsigned int mmSQ_PERFCOUNTER1_LO = 0xD1C2; -constexpr unsigned int mmSQ_PERFCOUNTER1_SELECT = 0xD9C1; -constexpr unsigned int mmSQ_PERFCOUNTER2_LO = 0xD1C4; -constexpr unsigned int mmSQ_PERFCOUNTER2_SELECT = 0xD9C2; -constexpr unsigned int mmSQ_PERFCOUNTER3_LO = 0xD1C6; -constexpr unsigned int mmSQ_PERFCOUNTER3_SELECT = 0xD9C3; -constexpr unsigned int mmSQ_PERFCOUNTER4_LO = 0xD1C8; -constexpr unsigned int mmSQ_PERFCOUNTER4_SELECT = 0xD9C4; -constexpr unsigned int mmSQ_PERFCOUNTER5_LO = 0xD1CA; -constexpr unsigned int mmSQ_PERFCOUNTER5_SELECT = 0xD9C5; -constexpr unsigned int mmSQ_PERFCOUNTER6_LO = 0xD1CC; -constexpr unsigned int mmSQ_PERFCOUNTER6_SELECT = 0xD9C6; -constexpr unsigned int mmSQ_PERFCOUNTER7_LO = 0xD1CE; -constexpr unsigned int mmSQ_PERFCOUNTER7_SELECT = 0xD9C7; -constexpr unsigned int mmSQ_PERFCOUNTER8_SELECT = 0xD9C8; -constexpr unsigned int mmSQ_PERFCOUNTER9_SELECT = 0xD9C9; -constexpr unsigned int mmSQ_PERFCOUNTER10_SELECT = 0xD9CA; -constexpr unsigned int mmSQ_PERFCOUNTER11_SELECT = 0xD9CB; -constexpr unsigned int mmSQ_PERFCOUNTER12_SELECT = 0xD9CC; -constexpr unsigned int mmSQ_PERFCOUNTER13_SELECT = 0xD9CD; -constexpr unsigned int mmSQ_PERFCOUNTER14_SELECT = 0xD9CE; -constexpr unsigned int mmSQ_PERFCOUNTER15_SELECT = 0xD9CF; -constexpr unsigned int mmSQ_PERFCOUNTER_CTRL = 0xD9E0; -constexpr unsigned int mmSQ_PERFCOUNTER_CTRL2 = 0xD9E2; -constexpr unsigned int mmSQ_RANDOM_WAVE_PRI = 0x2303; -constexpr unsigned int mmSQ_TEX_CLK_CTRL = 0xF08F; -constexpr unsigned int mmSQ_THREAD_TRACE_USERDATA_0 = 0xC340; -constexpr unsigned int mmSQ_THREAD_TRACE_USERDATA_1 = 0xC341; -constexpr unsigned int mmSQ_THREAD_TRACE_USERDATA_2 = 0xC342; -constexpr unsigned int mmSQ_THREAD_TRACE_USERDATA_3 = 0xC343; -constexpr unsigned int mmSX_BLEND_OPT_CONTROL = 0xA1D7; -constexpr unsigned int mmSX_BLEND_OPT_EPSILON = 0xA1D6; -constexpr unsigned int mmSX_MRT0_BLEND_OPT = 0xA1D8; -constexpr unsigned int mmSX_MRT1_BLEND_OPT = 0xA1D9; -constexpr unsigned int mmSX_MRT2_BLEND_OPT = 0xA1DA; -constexpr unsigned int mmSX_MRT3_BLEND_OPT = 0xA1DB; -constexpr unsigned int mmSX_MRT4_BLEND_OPT = 0xA1DC; -constexpr unsigned int mmSX_MRT5_BLEND_OPT = 0xA1DD; -constexpr unsigned int mmSX_MRT6_BLEND_OPT = 0xA1DE; -constexpr unsigned int mmSX_MRT7_BLEND_OPT = 0xA1DF; -constexpr unsigned int mmSX_PERFCOUNTER0_HI = 0xD241; -constexpr unsigned int mmSX_PERFCOUNTER0_LO = 0xD240; -constexpr unsigned int mmSX_PERFCOUNTER0_SELECT = 0xDA40; -constexpr unsigned int mmSX_PERFCOUNTER0_SELECT1 = 0xDA44; -constexpr unsigned int mmSX_PERFCOUNTER1_HI = 0xD243; -constexpr unsigned int mmSX_PERFCOUNTER1_LO = 0xD242; -constexpr unsigned int mmSX_PERFCOUNTER1_SELECT = 0xDA41; -constexpr unsigned int mmSX_PERFCOUNTER1_SELECT1 = 0xDA45; -constexpr unsigned int mmSX_PERFCOUNTER2_HI = 0xD245; -constexpr unsigned int mmSX_PERFCOUNTER2_LO = 0xD244; -constexpr unsigned int mmSX_PERFCOUNTER2_SELECT = 0xDA42; -constexpr unsigned int mmSX_PERFCOUNTER3_HI = 0xD247; -constexpr unsigned int mmSX_PERFCOUNTER3_LO = 0xD246; -constexpr unsigned int mmSX_PERFCOUNTER3_SELECT = 0xDA43; -constexpr unsigned int mmSX_PS_DOWNCONVERT = 0xA1D5; -constexpr unsigned int mmTA_BC_BASE_ADDR = 0xA020; -constexpr unsigned int mmTA_BC_BASE_ADDR_HI = 0xA021; -constexpr unsigned int mmTA_CGTT_CTRL = 0xF09D; -constexpr unsigned int mmTA_CNTL = 0x2541; -constexpr unsigned int mmTA_CNTL_AUX = 0x2542; -constexpr unsigned int mmTA_CS_BC_BASE_ADDR = 0xC380; -constexpr unsigned int mmTA_CS_BC_BASE_ADDR_HI = 0xC381; -constexpr unsigned int mmTA_PERFCOUNTER0_HI = 0xD2C1; -constexpr unsigned int mmTA_PERFCOUNTER0_LO = 0xD2C0; -constexpr unsigned int mmTA_PERFCOUNTER0_SELECT = 0xDAC0; -constexpr unsigned int mmTA_PERFCOUNTER0_SELECT1 = 0xDAC1; -constexpr unsigned int mmTA_PERFCOUNTER1_HI = 0xD2C3; -constexpr unsigned int mmTA_PERFCOUNTER1_LO = 0xD2C2; -constexpr unsigned int mmTA_PERFCOUNTER1_SELECT = 0xDAC2; -constexpr unsigned int mmTA_SCRATCH = 0x2564; -constexpr unsigned int mmTA_STATUS = 0x2548; -constexpr unsigned int mmTCP_PERFCOUNTER0_HI = 0xD341; -constexpr unsigned int mmTCP_PERFCOUNTER0_LO = 0xD340; -constexpr unsigned int mmTCP_PERFCOUNTER0_SELECT = 0xDB40; -constexpr unsigned int mmTCP_PERFCOUNTER0_SELECT1 = 0xDB41; -constexpr unsigned int mmTCP_PERFCOUNTER1_HI = 0xD343; -constexpr unsigned int mmTCP_PERFCOUNTER1_LO = 0xD342; -constexpr unsigned int mmTCP_PERFCOUNTER1_SELECT = 0xDB42; -constexpr unsigned int mmTCP_PERFCOUNTER1_SELECT1 = 0xDB43; -constexpr unsigned int mmTCP_PERFCOUNTER2_HI = 0xD345; -constexpr unsigned int mmTCP_PERFCOUNTER2_LO = 0xD344; -constexpr unsigned int mmTCP_PERFCOUNTER2_SELECT = 0xDB44; -constexpr unsigned int mmTCP_PERFCOUNTER3_HI = 0xD347; -constexpr unsigned int mmTCP_PERFCOUNTER3_LO = 0xD346; -constexpr unsigned int mmTCP_PERFCOUNTER3_SELECT = 0xDB45; -constexpr unsigned int mmTD_PERFCOUNTER0_HI = 0xD301; -constexpr unsigned int mmTD_PERFCOUNTER0_LO = 0xD300; -constexpr unsigned int mmTD_PERFCOUNTER0_SELECT = 0xDB00; -constexpr unsigned int mmTD_PERFCOUNTER0_SELECT1 = 0xDB01; -constexpr unsigned int mmTD_PERFCOUNTER1_HI = 0xD303; -constexpr unsigned int mmTD_PERFCOUNTER1_LO = 0xD302; -constexpr unsigned int mmTD_PERFCOUNTER1_SELECT = 0xDB02; -constexpr unsigned int mmVGT_DMA_BASE = 0xA1FA; -constexpr unsigned int mmVGT_DMA_BASE_HI = 0xA1F9; -constexpr unsigned int mmVGT_DMA_DATA_FIFO_DEPTH = 0x222D; -constexpr unsigned int mmVGT_DMA_INDEX_TYPE = 0xA29F; -constexpr unsigned int mmVGT_DMA_MAX_SIZE = 0xA29E; -constexpr unsigned int mmVGT_DMA_NUM_INSTANCES = 0xA2A2; -constexpr unsigned int mmVGT_DMA_REQ_FIFO_DEPTH = 0x222E; -constexpr unsigned int mmVGT_DMA_SIZE = 0xA29D; -constexpr unsigned int mmVGT_DRAW_INITIATOR = 0xA1FC; -constexpr unsigned int mmVGT_DRAW_INIT_FIFO_DEPTH = 0x222F; -constexpr unsigned int mmVGT_DRAW_PAYLOAD_CNTL = 0xA2A6; -constexpr unsigned int mmVGT_ENHANCE = 0xA294; -constexpr unsigned int mmVGT_ESGS_RING_ITEMSIZE = 0xA2AB; -constexpr unsigned int mmVGT_EVENT_ADDRESS_REG = 0xA1FE; -constexpr unsigned int mmVGT_EVENT_INITIATOR = 0xA2A4; -constexpr unsigned int mmVGT_GS_INSTANCE_CNT = 0xA2E4; -constexpr unsigned int mmVGT_GS_MAX_VERT_OUT = 0xA2CE; -constexpr unsigned int mmVGT_GS_MAX_WAVE_ID = 0x2269; -constexpr unsigned int mmVGT_HOS_MAX_TESS_LEVEL = 0xA286; -constexpr unsigned int mmVGT_HOS_MIN_TESS_LEVEL = 0xA287; -constexpr unsigned int mmVGT_INDEX_TYPE = 0xC243; -constexpr unsigned int mmVGT_INSTANCE_BASE_ID = 0xC25A; -constexpr unsigned int mmVGT_LS_HS_CONFIG = 0xA2D6; -constexpr unsigned int mmVGT_MC_LAT_CNTL = 0x2236; -constexpr unsigned int mmVGT_MULTI_PRIM_IB_RESET_INDX = 0xA103; -constexpr unsigned int mmVGT_NUM_INDICES = 0xC24C; -constexpr unsigned int mmVGT_NUM_INSTANCES = 0xC24D; -constexpr unsigned int mmVGT_PRIMITIVEID_EN = 0xA2A1; -constexpr unsigned int mmVGT_PRIMITIVEID_RESET = 0xA2A3; -constexpr unsigned int mmVGT_PRIMITIVE_TYPE = 0xC242; -constexpr unsigned int mmVGT_REUSE_OFF = 0xA2AD; -constexpr unsigned int mmVGT_SHADER_STAGES_EN = 0xA2D5; -constexpr unsigned int mmVGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0xA2CB; -constexpr unsigned int mmVGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0xA2CA; -constexpr unsigned int mmVGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0xA2CC; -constexpr unsigned int mmVGT_SYS_CONFIG = 0x2263; -constexpr unsigned int mmVGT_TESS_DISTRIBUTION = 0xA2D4; -constexpr unsigned int mmVGT_TF_PARAM = 0xA2DB; -constexpr unsigned int mmWD_CNTL_STATUS = 0x223F; -constexpr unsigned int mmWD_ENHANCE = 0xA2A0; -constexpr unsigned int mmWD_QOS = 0x2242; -constexpr unsigned int mmWD_UTCL1_CNTL = 0x2243; -constexpr unsigned int mmWD_UTCL1_STATUS = 0x2244; - -namespace Apu09_1xPlus -{ - constexpr unsigned int mmSPI_SHADER_PGM_CHKSUM_GS = 0x2C80; - constexpr unsigned int mmSPI_SHADER_PGM_CHKSUM_HS = 0x2D00; - constexpr unsigned int mmSPI_SHADER_PGM_CHKSUM_PS = 0x2C06; -} // namespace Apu09_1xPlus - -#if CHIP_HDR_PHOENIX1 -namespace Apu11 -{ - constexpr unsigned int mmATC_PERFCOUNTER0_CFG = 0x0C0D; - constexpr unsigned int mmATC_PERFCOUNTER1_CFG = 0x0C0E; - constexpr unsigned int mmATC_PERFCOUNTER2_CFG = 0x0C0F; - constexpr unsigned int mmATC_PERFCOUNTER3_CFG = 0x0C10; - constexpr unsigned int mmATC_PERFCOUNTER_HI = 0x0C0C; - constexpr unsigned int mmATC_PERFCOUNTER_LO = 0x0C0B; - constexpr unsigned int mmATC_PERFCOUNTER_RSLT_CNTL = 0x0C11; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER0_CFG = 0xDD44; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER1_CFG = 0xDD45; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_HI = 0xD4F5; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_LO = 0xD4F4; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_MODE = 0xDD42; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_SELECT = 0xDD40; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_SELECT1 = 0xDD41; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_HI = 0xD4F9; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_LO = 0xD4F8; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_RSLT_CNTL = 0xDD46; - constexpr unsigned int mmRPB_PERFCOUNTER0_CFG = 0x0CE8; - constexpr unsigned int mmRPB_PERFCOUNTER1_CFG = 0x0CE9; - constexpr unsigned int mmRPB_PERFCOUNTER2_CFG = 0x0CEA; - constexpr unsigned int mmRPB_PERFCOUNTER3_CFG = 0x0CEB; - constexpr unsigned int mmRPB_PERFCOUNTER_HI = 0x0CEE; - constexpr unsigned int mmRPB_PERFCOUNTER_LO = 0x0CEF; - constexpr unsigned int mmRPB_PERFCOUNTER_RSLT_CNTL = 0x0CEC; - constexpr unsigned int mmRPB_PERF_COUNTER_CNTL = 0x0CED; -} // namespace Apu11 -#endif - -namespace Core -{ - constexpr unsigned int mmDB_RING_CONTROL = 0x261B; - constexpr unsigned int mmGDS_PERFCOUNTER0_HI = 0xD281; - constexpr unsigned int mmGDS_PERFCOUNTER0_LO = 0xD280; - constexpr unsigned int mmGDS_PERFCOUNTER0_SELECT = 0xDA80; - constexpr unsigned int mmGDS_PERFCOUNTER0_SELECT1 = 0xDA84; - constexpr unsigned int mmGDS_PERFCOUNTER1_HI = 0xD283; - constexpr unsigned int mmGDS_PERFCOUNTER1_LO = 0xD282; - constexpr unsigned int mmGDS_PERFCOUNTER1_SELECT = 0xDA81; - constexpr unsigned int mmGDS_PERFCOUNTER2_HI = 0xD285; - constexpr unsigned int mmGDS_PERFCOUNTER2_LO = 0xD284; - constexpr unsigned int mmGDS_PERFCOUNTER2_SELECT = 0xDA82; - constexpr unsigned int mmGDS_PERFCOUNTER3_HI = 0xD287; - constexpr unsigned int mmGDS_PERFCOUNTER3_LO = 0xD286; - constexpr unsigned int mmGDS_PERFCOUNTER3_SELECT = 0xDA83; - constexpr unsigned int mmGRBM_SE1_PERFCOUNTER_HI = 0xD048; - constexpr unsigned int mmGRBM_SE1_PERFCOUNTER_LO = 0xD047; - constexpr unsigned int mmGRBM_SE1_PERFCOUNTER_SELECT = 0xD843; - constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_1 = 0x24AC; - constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_3 = 0x24AE; - constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_4 = 0x24AF; - constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_5 = 0x24B0; -} // namespace Core - -namespace Gfx09 -{ - constexpr unsigned int mmATC_L2_PERFCOUNTER0_CFG = 0xDD40; - constexpr unsigned int mmATC_L2_PERFCOUNTER1_CFG = 0xDD41; - constexpr unsigned int mmATC_L2_PERFCOUNTER_HI = 0xD501; - constexpr unsigned int mmATC_L2_PERFCOUNTER_LO = 0xD500; - constexpr unsigned int mmATC_L2_PERFCOUNTER_RSLT_CNTL = 0xDD42; - constexpr unsigned int mmATC_PERFCOUNTER0_CFG = 0x0C3E; - constexpr unsigned int mmATC_PERFCOUNTER1_CFG = 0x0C3F; - constexpr unsigned int mmATC_PERFCOUNTER2_CFG = 0x0C40; - constexpr unsigned int mmATC_PERFCOUNTER3_CFG = 0x0C41; - constexpr unsigned int mmATC_PERFCOUNTER_HI = 0x0C44; - constexpr unsigned int mmATC_PERFCOUNTER_LO = 0x0C43; - constexpr unsigned int mmATC_PERFCOUNTER_RSLT_CNTL = 0x0C42; - constexpr unsigned int mmCB_COLOR0_ATTRIB2 = 0xA31A; - constexpr unsigned int mmCB_COLOR0_BASE_EXT = 0xA319; - constexpr unsigned int mmCB_COLOR0_CMASK_BASE_EXT = 0xA320; - constexpr unsigned int mmCB_COLOR0_DCC_BASE_EXT = 0xA326; - constexpr unsigned int mmCB_COLOR0_FMASK_BASE_EXT = 0xA322; - constexpr unsigned int mmCB_COLOR1_ATTRIB2 = 0xA329; - constexpr unsigned int mmCB_COLOR1_BASE_EXT = 0xA328; - constexpr unsigned int mmCB_COLOR1_CMASK_BASE_EXT = 0xA32F; - constexpr unsigned int mmCB_COLOR1_DCC_BASE_EXT = 0xA335; - constexpr unsigned int mmCB_COLOR1_FMASK_BASE_EXT = 0xA331; - constexpr unsigned int mmCB_COLOR2_ATTRIB2 = 0xA338; - constexpr unsigned int mmCB_COLOR2_BASE_EXT = 0xA337; - constexpr unsigned int mmCB_COLOR2_CMASK_BASE_EXT = 0xA33E; - constexpr unsigned int mmCB_COLOR2_DCC_BASE_EXT = 0xA344; - constexpr unsigned int mmCB_COLOR2_FMASK_BASE_EXT = 0xA340; - constexpr unsigned int mmCB_COLOR3_ATTRIB2 = 0xA347; - constexpr unsigned int mmCB_COLOR3_BASE_EXT = 0xA346; - constexpr unsigned int mmCB_COLOR3_CMASK_BASE_EXT = 0xA34D; - constexpr unsigned int mmCB_COLOR3_DCC_BASE_EXT = 0xA353; - constexpr unsigned int mmCB_COLOR3_FMASK_BASE_EXT = 0xA34F; - constexpr unsigned int mmCB_COLOR4_ATTRIB2 = 0xA356; - constexpr unsigned int mmCB_COLOR4_BASE_EXT = 0xA355; - constexpr unsigned int mmCB_COLOR4_CMASK_BASE_EXT = 0xA35C; - constexpr unsigned int mmCB_COLOR4_DCC_BASE_EXT = 0xA362; - constexpr unsigned int mmCB_COLOR4_FMASK_BASE_EXT = 0xA35E; - constexpr unsigned int mmCB_COLOR5_ATTRIB2 = 0xA365; - constexpr unsigned int mmCB_COLOR5_BASE_EXT = 0xA364; - constexpr unsigned int mmCB_COLOR5_CMASK_BASE_EXT = 0xA36B; - constexpr unsigned int mmCB_COLOR5_DCC_BASE_EXT = 0xA371; - constexpr unsigned int mmCB_COLOR5_FMASK_BASE_EXT = 0xA36D; - constexpr unsigned int mmCB_COLOR6_ATTRIB2 = 0xA374; - constexpr unsigned int mmCB_COLOR6_BASE_EXT = 0xA373; - constexpr unsigned int mmCB_COLOR6_CMASK_BASE_EXT = 0xA37A; - constexpr unsigned int mmCB_COLOR6_DCC_BASE_EXT = 0xA380; - constexpr unsigned int mmCB_COLOR6_FMASK_BASE_EXT = 0xA37C; - constexpr unsigned int mmCB_COLOR7_ATTRIB2 = 0xA383; - constexpr unsigned int mmCB_COLOR7_BASE_EXT = 0xA382; - constexpr unsigned int mmCB_COLOR7_CMASK_BASE_EXT = 0xA389; - constexpr unsigned int mmCB_COLOR7_DCC_BASE_EXT = 0xA38F; - constexpr unsigned int mmCB_COLOR7_FMASK_BASE_EXT = 0xA38B; - constexpr unsigned int mmCB_DCC_CONFIG = 0x2688; - constexpr unsigned int mmCB_HW_CONTROL = 0x2680; - constexpr unsigned int mmCB_HW_CONTROL_1 = 0x2681; - constexpr unsigned int mmCB_HW_CONTROL_2 = 0x2682; - constexpr unsigned int mmCB_HW_MEM_ARBITER_RD = 0x2686; - constexpr unsigned int mmCB_HW_MEM_ARBITER_WR = 0x2687; - constexpr unsigned int mmCB_MRT0_EPITCH = 0xA1E8; - constexpr unsigned int mmCB_MRT1_EPITCH = 0xA1E9; - constexpr unsigned int mmCB_MRT2_EPITCH = 0xA1EA; - constexpr unsigned int mmCB_MRT3_EPITCH = 0xA1EB; - constexpr unsigned int mmCB_MRT4_EPITCH = 0xA1EC; - constexpr unsigned int mmCB_MRT5_EPITCH = 0xA1ED; - constexpr unsigned int mmCB_MRT6_EPITCH = 0xA1EE; - constexpr unsigned int mmCB_MRT7_EPITCH = 0xA1EF; - constexpr unsigned int mmCOMPUTE_RELAUNCH = 0x2E22; - constexpr unsigned int mmCOMPUTE_WAVE_RESTORE_ADDR_HI = 0x2E24; - constexpr unsigned int mmCOMPUTE_WAVE_RESTORE_ADDR_LO = 0x2E23; - constexpr unsigned int mmDB_DEPTH_SIZE = 0xA007; - constexpr unsigned int mmDB_DFSM_CONFIG = 0x2630; - constexpr unsigned int mmDB_DFSM_CONTROL = 0xA018; - constexpr unsigned int mmDB_DFSM_FLUSH_AUX_EVENT = 0x2636; - constexpr unsigned int mmDB_DFSM_FLUSH_ENABLE = 0x2635; - constexpr unsigned int mmDB_DFSM_PRIMS_IN_FLIGHT = 0x2633; - constexpr unsigned int mmDB_DFSM_TILES_IN_FLIGHT = 0x2632; - constexpr unsigned int mmDB_DFSM_WATCHDOG = 0x2634; - constexpr unsigned int mmDB_DFSM_WATERMARK = 0x2631; - constexpr unsigned int mmDB_EXCEPTION_CONTROL = 0x261A; - constexpr unsigned int mmDB_HTILE_DATA_BASE_HI = 0xA006; - constexpr unsigned int mmDB_RMI_CACHE_POLICY = 0x261E; - constexpr unsigned int mmDB_STENCIL_INFO = 0xA00F; - constexpr unsigned int mmDB_STENCIL_INFO2 = 0xA01B; - constexpr unsigned int mmDB_STENCIL_READ_BASE = 0xA012; - constexpr unsigned int mmDB_STENCIL_READ_BASE_HI = 0xA013; - constexpr unsigned int mmDB_STENCIL_WRITE_BASE = 0xA016; - constexpr unsigned int mmDB_STENCIL_WRITE_BASE_HI = 0xA017; - constexpr unsigned int mmDB_Z_INFO = 0xA00E; - constexpr unsigned int mmDB_Z_INFO2 = 0xA01A; - constexpr unsigned int mmDB_Z_READ_BASE = 0xA010; - constexpr unsigned int mmDB_Z_READ_BASE_HI = 0xA011; - constexpr unsigned int mmDB_Z_WRITE_BASE_HI = 0xA015; - constexpr unsigned int mmGRBM_SE2_PERFCOUNTER_HI = 0xD04A; - constexpr unsigned int mmGRBM_SE2_PERFCOUNTER_LO = 0xD049; - constexpr unsigned int mmGRBM_SE2_PERFCOUNTER_SELECT = 0xD844; - constexpr unsigned int mmGRBM_SE3_PERFCOUNTER_HI = 0xD04C; - constexpr unsigned int mmGRBM_SE3_PERFCOUNTER_LO = 0xD04B; - constexpr unsigned int mmGRBM_SE3_PERFCOUNTER_SELECT = 0xD845; - constexpr unsigned int mmIA_CNTL_STATUS = 0x2237; - constexpr unsigned int mmIA_MULTI_VGT_PARAM = 0xC258; - constexpr unsigned int mmIA_PERFCOUNTER0_HI = 0xD089; - constexpr unsigned int mmIA_PERFCOUNTER0_LO = 0xD088; - constexpr unsigned int mmIA_PERFCOUNTER0_SELECT = 0xD884; - constexpr unsigned int mmIA_PERFCOUNTER0_SELECT1 = 0xD888; - constexpr unsigned int mmIA_PERFCOUNTER1_HI = 0xD08B; - constexpr unsigned int mmIA_PERFCOUNTER1_LO = 0xD08A; - constexpr unsigned int mmIA_PERFCOUNTER1_SELECT = 0xD885; - constexpr unsigned int mmIA_PERFCOUNTER2_HI = 0xD08D; - constexpr unsigned int mmIA_PERFCOUNTER2_LO = 0xD08C; - constexpr unsigned int mmIA_PERFCOUNTER2_SELECT = 0xD886; - constexpr unsigned int mmIA_PERFCOUNTER3_HI = 0xD08F; - constexpr unsigned int mmIA_PERFCOUNTER3_LO = 0xD08E; - constexpr unsigned int mmIA_PERFCOUNTER3_SELECT = 0xD887; - constexpr unsigned int mmMC_VM_L2_PERFCOUNTER0_CFG = 0xDD4C; - constexpr unsigned int mmMC_VM_L2_PERFCOUNTER1_CFG = 0xDD4D; - constexpr unsigned int mmMC_VM_L2_PERFCOUNTER2_CFG = 0xDD4E; - constexpr unsigned int mmMC_VM_L2_PERFCOUNTER3_CFG = 0xDD4F; - constexpr unsigned int mmMC_VM_L2_PERFCOUNTER4_CFG = 0xDD50; - constexpr unsigned int mmMC_VM_L2_PERFCOUNTER5_CFG = 0xDD51; - constexpr unsigned int mmMC_VM_L2_PERFCOUNTER6_CFG = 0xDD52; - constexpr unsigned int mmMC_VM_L2_PERFCOUNTER7_CFG = 0xDD53; - constexpr unsigned int mmMC_VM_L2_PERFCOUNTER_HI = 0xD509; - constexpr unsigned int mmMC_VM_L2_PERFCOUNTER_LO = 0xD508; - constexpr unsigned int mmMC_VM_L2_PERFCOUNTER_RSLT_CNTL = 0xDD54; - constexpr unsigned int mmPA_SU_PERFCOUNTER3_SELECT = 0xD905; - constexpr unsigned int mmPA_UTCL1_CNTL1 = 0x22F9; - constexpr unsigned int mmPA_UTCL1_CNTL2 = 0x22FA; - constexpr unsigned int mmRLC_PERFMON_CLK_CNTL = 0xDCBF; - constexpr unsigned int mmRLC_SPM_CB_PERFMON_SAMPLE_DELAY = 0xDC8A; - constexpr unsigned int mmRLC_SPM_CPC_PERFMON_SAMPLE_DELAY = 0xDC88; - constexpr unsigned int mmRLC_SPM_CPF_PERFMON_SAMPLE_DELAY = 0xDC89; - constexpr unsigned int mmRLC_SPM_CPG_PERFMON_SAMPLE_DELAY = 0xDC87; - constexpr unsigned int mmRLC_SPM_DB_PERFMON_SAMPLE_DELAY = 0xDC8B; - constexpr unsigned int mmRLC_SPM_GDS_PERFMON_SAMPLE_DELAY = 0xDC8D; - constexpr unsigned int mmRLC_SPM_GLOBAL_MUXSEL_ADDR = 0xDC9B; - constexpr unsigned int mmRLC_SPM_GLOBAL_MUXSEL_DATA = 0xDC9C; - constexpr unsigned int mmRLC_SPM_IA_PERFMON_SAMPLE_DELAY = 0xDC8E; - constexpr unsigned int mmRLC_SPM_PA_PERFMON_SAMPLE_DELAY = 0xDC8C; - constexpr unsigned int mmRLC_SPM_RING_RDPTR = 0xDC9D; - constexpr unsigned int mmRLC_SPM_RMI_PERFMON_SAMPLE_DELAY = 0xDCA3; - constexpr unsigned int mmRLC_SPM_SC_PERFMON_SAMPLE_DELAY = 0xDC90; - constexpr unsigned int mmRLC_SPM_SEGMENT_THRESHOLD = 0xDC9E; - constexpr unsigned int mmRLC_SPM_SE_MUXSEL_ADDR = 0xDC85; - constexpr unsigned int mmRLC_SPM_SE_MUXSEL_DATA = 0xDC86; - constexpr unsigned int mmRLC_SPM_SPI_PERFMON_SAMPLE_DELAY = 0xDC97; - constexpr unsigned int mmRLC_SPM_SQG_PERFMON_SAMPLE_DELAY = 0xDC98; - constexpr unsigned int mmRLC_SPM_SX_PERFMON_SAMPLE_DELAY = 0xDC9A; - constexpr unsigned int mmRLC_SPM_TA_PERFMON_SAMPLE_DELAY = 0xDC94; - constexpr unsigned int mmRLC_SPM_TCA_PERFMON_SAMPLE_DELAY = 0xDC92; - constexpr unsigned int mmRLC_SPM_TCC_PERFMON_SAMPLE_DELAY = 0xDC91; - constexpr unsigned int mmRLC_SPM_TCP_PERFMON_SAMPLE_DELAY = 0xDC93; - constexpr unsigned int mmRLC_SPM_TD_PERFMON_SAMPLE_DELAY = 0xDC95; - constexpr unsigned int mmRLC_SPM_VGT_PERFMON_SAMPLE_DELAY = 0xDC96; - constexpr unsigned int mmSDMA0_PERFCOUNTER0_RESULT = 0x12B8; - constexpr unsigned int mmSDMA0_PERFCOUNTER1_RESULT = 0x12B9; - constexpr unsigned int mmSDMA0_PERFCOUNTER_TAG_DELAY_RANGE = 0x12BA; - constexpr unsigned int mmSDMA0_PERFMON_CNTL = 0x12B7; - constexpr unsigned int mmSPI_CONFIG_PS_CU_EN = 0x2452; - constexpr unsigned int mmSPI_CSQ_WF_ACTIVE_COUNT_4 = 0x24E0; - constexpr unsigned int mmSPI_CSQ_WF_ACTIVE_COUNT_5 = 0x24E1; - constexpr unsigned int mmSPI_CSQ_WF_ACTIVE_COUNT_6 = 0x24E2; - constexpr unsigned int mmSPI_CSQ_WF_ACTIVE_COUNT_7 = 0x24E3; - constexpr unsigned int mmSPI_LB_CU_MASK = 0x24D5; - constexpr unsigned int mmSPI_LB_DATA_PERCU_WAVE_CS = 0x24E7; - constexpr unsigned int mmSPI_LB_DATA_PERCU_WAVE_HSGS = 0x24E5; - constexpr unsigned int mmSPI_LB_DATA_PERCU_WAVE_VSPS = 0x24E6; - constexpr unsigned int mmSPI_PG_ENABLE_STATIC_CU_MASK = 0x24D7; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_2 = 0x31DE; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_3 = 0x31DF; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_4 = 0x31E0; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_5 = 0x31E1; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_6 = 0x31E2; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_7 = 0x31E3; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_8 = 0x31E4; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_9 = 0x31E5; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_10 = 0x31F0; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_11 = 0x31F1; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_12 = 0x31F4; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_13 = 0x31F5; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_14 = 0x31F6; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_15 = 0x31F7; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_2 = 0x31E8; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_3 = 0x31E9; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_4 = 0x31EA; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_5 = 0x31EB; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_6 = 0x31EC; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_7 = 0x31ED; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_8 = 0x31EE; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_9 = 0x31EF; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_10 = 0x31F2; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_11 = 0x31F3; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_12 = 0x31F8; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_13 = 0x31F9; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_14 = 0x31FA; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_15 = 0x31FB; - constexpr unsigned int mmSPI_SHADER_PGM_HI_ES = 0x2C85; - constexpr unsigned int mmSPI_SHADER_PGM_HI_LS = 0x2D05; - constexpr unsigned int mmSPI_SHADER_PGM_LO_ES = 0x2C84; - constexpr unsigned int mmSPI_SHADER_PGM_LO_LS = 0x2D04; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC2_GS_VS = 0x2C7C; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_0 = 0x2D4C; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_1 = 0x2D4D; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_2 = 0x2D4E; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_3 = 0x2D4F; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_4 = 0x2D50; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_5 = 0x2D51; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_6 = 0x2D52; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_7 = 0x2D53; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_8 = 0x2D54; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_9 = 0x2D55; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_10 = 0x2D56; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_11 = 0x2D57; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_12 = 0x2D58; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_13 = 0x2D59; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_14 = 0x2D5A; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_15 = 0x2D5B; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_16 = 0x2D5C; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_17 = 0x2D5D; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_18 = 0x2D5E; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_19 = 0x2D5F; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_20 = 0x2D60; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_21 = 0x2D61; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_22 = 0x2D62; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_23 = 0x2D63; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_24 = 0x2D64; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_25 = 0x2D65; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_26 = 0x2D66; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_27 = 0x2D67; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_28 = 0x2D68; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_29 = 0x2D69; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_30 = 0x2D6A; - constexpr unsigned int mmSPI_SHADER_USER_DATA_COMMON_31 = 0x2D6B; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_0 = 0x2CCC; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_1 = 0x2CCD; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_2 = 0x2CCE; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_3 = 0x2CCF; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_4 = 0x2CD0; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_5 = 0x2CD1; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_6 = 0x2CD2; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_7 = 0x2CD3; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_8 = 0x2CD4; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_9 = 0x2CD5; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_10 = 0x2CD6; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_11 = 0x2CD7; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_12 = 0x2CD8; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_13 = 0x2CD9; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_14 = 0x2CDA; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_15 = 0x2CDB; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_16 = 0x2CDC; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_17 = 0x2CDD; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_18 = 0x2CDE; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_19 = 0x2CDF; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_20 = 0x2CE0; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_21 = 0x2CE1; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_22 = 0x2CE2; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_23 = 0x2CE3; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_24 = 0x2CE4; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_25 = 0x2CE5; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_26 = 0x2CE6; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_27 = 0x2CE7; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_28 = 0x2CE8; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_29 = 0x2CE9; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_30 = 0x2CEA; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_31 = 0x2CEB; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_0 = 0x2D0C; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_1 = 0x2D0D; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_2 = 0x2D0E; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_3 = 0x2D0F; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_4 = 0x2D10; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_5 = 0x2D11; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_6 = 0x2D12; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_7 = 0x2D13; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_8 = 0x2D14; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_9 = 0x2D15; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_10 = 0x2D16; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_11 = 0x2D17; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_12 = 0x2D18; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_13 = 0x2D19; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_14 = 0x2D1A; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_15 = 0x2D1B; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_16 = 0x2D1C; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_17 = 0x2D1D; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_18 = 0x2D1E; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_19 = 0x2D1F; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_20 = 0x2D20; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_21 = 0x2D21; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_22 = 0x2D22; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_23 = 0x2D23; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_24 = 0x2D24; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_25 = 0x2D25; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_26 = 0x2D26; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_27 = 0x2D27; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_28 = 0x2D28; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_29 = 0x2D29; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_30 = 0x2D2A; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_31 = 0x2D2B; - constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_6 = 0x24B1; - constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_7 = 0x24B2; - constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_8 = 0x24B3; - constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_9 = 0x24B4; - constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_3 = 0x24B8; - constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_5 = 0x24BA; - constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_10 = 0x24BF; - constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_12 = 0x24C1; - constexpr unsigned int mmSQ_BUF_RSRC_WORD0 = 0x23C0; - constexpr unsigned int mmSQ_BUF_RSRC_WORD1 = 0x23C1; - constexpr unsigned int mmSQ_BUF_RSRC_WORD2 = 0x23C2; - constexpr unsigned int mmSQ_BUF_RSRC_WORD3 = 0x23C3; - constexpr unsigned int mmSQ_CMD_TIMESTAMP = 0x2375; - constexpr unsigned int mmSQ_DS_0 = 0x237F; - constexpr unsigned int mmSQ_DS_1 = 0x237F; - constexpr unsigned int mmSQ_EDC_CNT = 0x23A6; - constexpr unsigned int mmSQ_EDC_DED_CNT = 0x23A4; - constexpr unsigned int mmSQ_EDC_FUE_CNTL = 0x23A7; - constexpr unsigned int mmSQ_EDC_INFO = 0x23A5; - constexpr unsigned int mmSQ_EDC_SEC_CNT = 0x23A3; - constexpr unsigned int mmSQ_EXP_0 = 0x237F; - constexpr unsigned int mmSQ_EXP_1 = 0x237F; - constexpr unsigned int mmSQ_FLAT_0 = 0x237F; - constexpr unsigned int mmSQ_FLAT_1 = 0x237F; - constexpr unsigned int mmSQ_FLAT_SCRATCH_WORD0 = 0x23D0; - constexpr unsigned int mmSQ_FLAT_SCRATCH_WORD1 = 0x23D1; - constexpr unsigned int mmSQ_GLBL_0 = 0x237F; - constexpr unsigned int mmSQ_GLBL_1 = 0x237F; - constexpr unsigned int mmSQ_IMG_RSRC_WORD0 = 0x23C4; - constexpr unsigned int mmSQ_IMG_RSRC_WORD1 = 0x23C5; - constexpr unsigned int mmSQ_IMG_RSRC_WORD2 = 0x23C6; - constexpr unsigned int mmSQ_IMG_RSRC_WORD3 = 0x23C7; - constexpr unsigned int mmSQ_IMG_RSRC_WORD4 = 0x23C8; - constexpr unsigned int mmSQ_IMG_RSRC_WORD5 = 0x23C9; - constexpr unsigned int mmSQ_IMG_RSRC_WORD6 = 0x23CA; - constexpr unsigned int mmSQ_IMG_RSRC_WORD7 = 0x23CB; - constexpr unsigned int mmSQ_IMG_SAMP_WORD0 = 0x23CC; - constexpr unsigned int mmSQ_IMG_SAMP_WORD1 = 0x23CD; - constexpr unsigned int mmSQ_IMG_SAMP_WORD2 = 0x23CE; - constexpr unsigned int mmSQ_IMG_SAMP_WORD3 = 0x23CF; - constexpr unsigned int mmSQ_INST = 0x237F; - constexpr unsigned int mmSQ_INTERRUPT_AUTO_MASK = 0x2314; - constexpr unsigned int mmSQ_INTERRUPT_MSG_CTRL = 0x2315; - constexpr unsigned int mmSQ_LB_CTR0_CU = 0x239E; - constexpr unsigned int mmSQ_LB_CTR1_CU = 0x239F; - constexpr unsigned int mmSQ_LB_CTR2_CU = 0x23A0; - constexpr unsigned int mmSQ_LB_CTR3_CU = 0x23A1; - constexpr unsigned int mmSQ_LB_CTR_SEL = 0x239D; - constexpr unsigned int mmSQ_M0_GPR_IDX_WORD = 0x23D2; - constexpr unsigned int mmSQ_MIMG_0 = 0x237F; - constexpr unsigned int mmSQ_MIMG_1 = 0x237F; - constexpr unsigned int mmSQ_MTBUF_0 = 0x237F; - constexpr unsigned int mmSQ_MTBUF_1 = 0x237F; - constexpr unsigned int mmSQ_MUBUF_0 = 0x237F; - constexpr unsigned int mmSQ_MUBUF_1 = 0x237F; - constexpr unsigned int mmSQ_PERFCOUNTER_MASK = 0xD9E1; - constexpr unsigned int mmSQ_POWER_THROTTLE = 0xF091; - constexpr unsigned int mmSQ_POWER_THROTTLE2 = 0xF092; - constexpr unsigned int mmSQ_REG_CREDITS = 0x2304; - constexpr unsigned int mmSQ_REG_TIMESTAMP = 0x2374; - constexpr unsigned int mmSQ_SCRATCH_0 = 0x237F; - constexpr unsigned int mmSQ_SCRATCH_1 = 0x237F; - constexpr unsigned int mmSQ_SHADER_TBA_HI = 0x231D; - constexpr unsigned int mmSQ_SHADER_TBA_LO = 0x231C; - constexpr unsigned int mmSQ_SHADER_TMA_HI = 0x231F; - constexpr unsigned int mmSQ_SHADER_TMA_LO = 0x231E; - constexpr unsigned int mmSQ_SMEM_0 = 0x237F; - constexpr unsigned int mmSQ_SMEM_1 = 0x237F; - constexpr unsigned int mmSQ_SOP1 = 0x237F; - constexpr unsigned int mmSQ_SOP2 = 0x237F; - constexpr unsigned int mmSQ_SOPC = 0x237F; - constexpr unsigned int mmSQ_SOPK = 0x237F; - constexpr unsigned int mmSQ_SOPP = 0x237F; - constexpr unsigned int mmSQ_THREAD_TRACE_BASE = 0xC330; - constexpr unsigned int mmSQ_THREAD_TRACE_BASE2 = 0xC337; - constexpr unsigned int mmSQ_THREAD_TRACE_CNTR = 0xC33C; - constexpr unsigned int mmSQ_THREAD_TRACE_CTRL = 0xC335; - constexpr unsigned int mmSQ_THREAD_TRACE_HIWATER = 0xC33B; - constexpr unsigned int mmSQ_THREAD_TRACE_MASK = 0xC332; - constexpr unsigned int mmSQ_THREAD_TRACE_MODE = 0xC336; - constexpr unsigned int mmSQ_THREAD_TRACE_PERF_MASK = 0xC334; - constexpr unsigned int mmSQ_THREAD_TRACE_SIZE = 0xC331; - constexpr unsigned int mmSQ_THREAD_TRACE_STATUS = 0xC33A; - constexpr unsigned int mmSQ_THREAD_TRACE_TOKEN_MASK = 0xC333; - constexpr unsigned int mmSQ_THREAD_TRACE_TOKEN_MASK2 = 0xC338; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_CMN = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_EVENT = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_INST = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_INST_PC_1_OF_2 = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_INST_PC_2_OF_2 = 0x23B1; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2 = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_INST_USERDATA_2_OF_2 = 0x23B1; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_ISSUE = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_MISC = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_PERF_1_OF_2 = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_PERF_2_OF_2 = 0x23B1; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_REG_1_OF_2 = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_REG_2_OF_2 = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_REG_CS_1_OF_2 = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_REG_CS_2_OF_2 = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_TIMESTAMP_1_OF_2 = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_TIMESTAMP_2_OF_2 = 0x23B1; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_WAVE = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WORD_WAVE_START = 0x23B0; - constexpr unsigned int mmSQ_THREAD_TRACE_WPTR = 0xC339; - constexpr unsigned int mmSQ_TIME_HI = 0x237C; - constexpr unsigned int mmSQ_TIME_LO = 0x237D; - constexpr unsigned int mmSQ_UTCL1_CNTL1 = 0x2317; - constexpr unsigned int mmSQ_UTCL1_CNTL2 = 0x2318; - constexpr unsigned int mmSQ_UTCL1_STATUS = 0x2319; - constexpr unsigned int mmSQ_VINTRP = 0x237F; - constexpr unsigned int mmSQ_VOP1 = 0x237F; - constexpr unsigned int mmSQ_VOP2 = 0x237F; - constexpr unsigned int mmSQ_VOP3P_0 = 0x237F; - constexpr unsigned int mmSQ_VOP3P_1 = 0x237F; - constexpr unsigned int mmSQ_VOP3_0 = 0x237F; - constexpr unsigned int mmSQ_VOP3_0_SDST_ENC = 0x237F; - constexpr unsigned int mmSQ_VOP3_1 = 0x237F; - constexpr unsigned int mmSQ_VOPC = 0x237F; - constexpr unsigned int mmSQ_VOP_DPP = 0x237F; - constexpr unsigned int mmSQ_VOP_SDWA = 0x237F; - constexpr unsigned int mmSQ_VOP_SDWA_SDST_ENC = 0x237F; - constexpr unsigned int mmSQ_WREXEC_EXEC_HI = 0x23B1; - constexpr unsigned int mmSQ_WREXEC_EXEC_LO = 0x23B1; - constexpr unsigned int mmTA_RESERVED_010C = 0x2543; - constexpr unsigned int mmTCA_PERFCOUNTER0_HI = 0xD391; - constexpr unsigned int mmTCA_PERFCOUNTER0_LO = 0xD390; - constexpr unsigned int mmTCA_PERFCOUNTER0_SELECT = 0xDB90; - constexpr unsigned int mmTCA_PERFCOUNTER0_SELECT1 = 0xDB91; - constexpr unsigned int mmTCA_PERFCOUNTER1_HI = 0xD393; - constexpr unsigned int mmTCA_PERFCOUNTER1_LO = 0xD392; - constexpr unsigned int mmTCA_PERFCOUNTER1_SELECT = 0xDB92; - constexpr unsigned int mmTCA_PERFCOUNTER1_SELECT1 = 0xDB93; - constexpr unsigned int mmTCA_PERFCOUNTER2_HI = 0xD395; - constexpr unsigned int mmTCA_PERFCOUNTER2_LO = 0xD394; - constexpr unsigned int mmTCA_PERFCOUNTER2_SELECT = 0xDB94; - constexpr unsigned int mmTCA_PERFCOUNTER3_HI = 0xD397; - constexpr unsigned int mmTCA_PERFCOUNTER3_LO = 0xD396; - constexpr unsigned int mmTCA_PERFCOUNTER3_SELECT = 0xDB95; - constexpr unsigned int mmTCC_PERFCOUNTER0_HI = 0xD381; - constexpr unsigned int mmTCC_PERFCOUNTER0_LO = 0xD380; - constexpr unsigned int mmTCC_PERFCOUNTER0_SELECT = 0xDB80; - constexpr unsigned int mmTCC_PERFCOUNTER0_SELECT1 = 0xDB81; - constexpr unsigned int mmTCC_PERFCOUNTER1_HI = 0xD383; - constexpr unsigned int mmTCC_PERFCOUNTER1_LO = 0xD382; - constexpr unsigned int mmTCC_PERFCOUNTER1_SELECT = 0xDB82; - constexpr unsigned int mmTCC_PERFCOUNTER1_SELECT1 = 0xDB83; - constexpr unsigned int mmTCC_PERFCOUNTER2_HI = 0xD385; - constexpr unsigned int mmTCC_PERFCOUNTER2_LO = 0xD384; - constexpr unsigned int mmTCC_PERFCOUNTER2_SELECT = 0xDB84; - constexpr unsigned int mmTCC_PERFCOUNTER3_HI = 0xD387; - constexpr unsigned int mmTCC_PERFCOUNTER3_LO = 0xD386; - constexpr unsigned int mmTCC_PERFCOUNTER3_SELECT = 0xDB85; - constexpr unsigned int mmUMCCH0_PerfMonCtl1 = 0x14341; - constexpr unsigned int mmUMCCH0_PerfMonCtl2 = 0x14342; - constexpr unsigned int mmUMCCH0_PerfMonCtl3 = 0x14343; - constexpr unsigned int mmUMCCH0_PerfMonCtl4 = 0x14344; - constexpr unsigned int mmUMCCH0_PerfMonCtl5 = 0x14345; - constexpr unsigned int mmUMCCH0_PerfMonCtlClk = 0x14340; - constexpr unsigned int mmUMCCH0_PerfMonCtr1_Hi = 0x1434B; - constexpr unsigned int mmUMCCH0_PerfMonCtr1_Lo = 0x1434A; - constexpr unsigned int mmUMCCH0_PerfMonCtr2_Hi = 0x1434D; - constexpr unsigned int mmUMCCH0_PerfMonCtr2_Lo = 0x1434C; - constexpr unsigned int mmUMCCH0_PerfMonCtr3_Hi = 0x1434F; - constexpr unsigned int mmUMCCH0_PerfMonCtr3_Lo = 0x1434E; - constexpr unsigned int mmUMCCH0_PerfMonCtr4_Hi = 0x14351; - constexpr unsigned int mmUMCCH0_PerfMonCtr4_Lo = 0x14350; - constexpr unsigned int mmUMCCH0_PerfMonCtr5_Hi = 0x14353; - constexpr unsigned int mmUMCCH0_PerfMonCtr5_Lo = 0x14352; - constexpr unsigned int mmUMCCH0_PerfMonCtrClk_Hi = 0x14349; - constexpr unsigned int mmUMCCH0_PerfMonCtrClk_Lo = 0x14348; - constexpr unsigned int mmVGT_CACHE_INVALIDATION = 0x2231; - constexpr unsigned int mmVGT_CNTL_STATUS = 0x223C; - constexpr unsigned int mmVGT_DISPATCH_DRAW_INDEX = 0xA2DD; - constexpr unsigned int mmVGT_GSVS_RING_SIZE = 0xC241; - constexpr unsigned int mmVGT_GS_MAX_PRIMS_PER_SUBGROUP = 0xA2A5; - constexpr unsigned int mmVGT_INDX_OFFSET = 0xC24A; - constexpr unsigned int mmVGT_MAX_VTX_INDX = 0xC248; - constexpr unsigned int mmVGT_MIN_VTX_INDX = 0xC249; - constexpr unsigned int mmVGT_MULTI_PRIM_IB_RESET_EN = 0xC24B; - constexpr unsigned int mmVGT_PERFCOUNTER0_HI = 0xD091; - constexpr unsigned int mmVGT_PERFCOUNTER0_LO = 0xD090; - constexpr unsigned int mmVGT_PERFCOUNTER0_SELECT = 0xD88C; - constexpr unsigned int mmVGT_PERFCOUNTER0_SELECT1 = 0xD890; - constexpr unsigned int mmVGT_PERFCOUNTER1_HI = 0xD093; - constexpr unsigned int mmVGT_PERFCOUNTER1_LO = 0xD092; - constexpr unsigned int mmVGT_PERFCOUNTER1_SELECT = 0xD88D; - constexpr unsigned int mmVGT_PERFCOUNTER1_SELECT1 = 0xD891; - constexpr unsigned int mmVGT_PERFCOUNTER2_HI = 0xD095; - constexpr unsigned int mmVGT_PERFCOUNTER2_LO = 0xD094; - constexpr unsigned int mmVGT_PERFCOUNTER2_SELECT = 0xD88E; - constexpr unsigned int mmVGT_PERFCOUNTER3_HI = 0xD097; - constexpr unsigned int mmVGT_PERFCOUNTER3_LO = 0xD096; - constexpr unsigned int mmVGT_PERFCOUNTER3_SELECT = 0xD88F; - constexpr unsigned int mmVGT_PERFCOUNTER_SEID_MASK = 0xD894; - constexpr unsigned int mmVGT_STRMOUT_DELAY = 0x2233; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI = 0xC251; - constexpr unsigned int mmWD_PERFCOUNTER0_HI = 0xD081; - constexpr unsigned int mmWD_PERFCOUNTER0_LO = 0xD080; - constexpr unsigned int mmWD_PERFCOUNTER0_SELECT = 0xD880; - constexpr unsigned int mmWD_PERFCOUNTER1_HI = 0xD083; - constexpr unsigned int mmWD_PERFCOUNTER1_LO = 0xD082; - constexpr unsigned int mmWD_PERFCOUNTER1_SELECT = 0xD881; - constexpr unsigned int mmWD_PERFCOUNTER2_HI = 0xD085; - constexpr unsigned int mmWD_PERFCOUNTER2_LO = 0xD084; - constexpr unsigned int mmWD_PERFCOUNTER2_SELECT = 0xD882; - constexpr unsigned int mmWD_PERFCOUNTER3_HI = 0xD087; - constexpr unsigned int mmWD_PERFCOUNTER3_LO = 0xD086; - constexpr unsigned int mmWD_PERFCOUNTER3_SELECT = 0xD883; -} // namespace Gfx09 - -namespace Gfx09_0 -{ - constexpr unsigned int mmGCEA_PERFCOUNTER0_CFG = 0x2AF9; - constexpr unsigned int mmGCEA_PERFCOUNTER1_CFG = 0x2AFA; - constexpr unsigned int mmGCEA_PERFCOUNTER_HI = 0x2AF8; - constexpr unsigned int mmGCEA_PERFCOUNTER_LO = 0x2AF7; - constexpr unsigned int mmGCEA_PERFCOUNTER_RSLT_CNTL = 0x2AFB; -} // namespace Gfx09_0 - -namespace Gfx09_10 -{ - constexpr unsigned int mmCB_COLOR0_CLEAR_WORD0 = 0xA323; - constexpr unsigned int mmCB_COLOR0_CLEAR_WORD1 = 0xA324; - constexpr unsigned int mmCB_COLOR0_CMASK = 0xA31F; - constexpr unsigned int mmCB_COLOR0_FMASK = 0xA321; - constexpr unsigned int mmCB_COLOR1_CLEAR_WORD0 = 0xA332; - constexpr unsigned int mmCB_COLOR1_CLEAR_WORD1 = 0xA333; - constexpr unsigned int mmCB_COLOR1_CMASK = 0xA32E; - constexpr unsigned int mmCB_COLOR1_FMASK = 0xA330; - constexpr unsigned int mmCB_COLOR2_CLEAR_WORD0 = 0xA341; - constexpr unsigned int mmCB_COLOR2_CLEAR_WORD1 = 0xA342; - constexpr unsigned int mmCB_COLOR2_CMASK = 0xA33D; - constexpr unsigned int mmCB_COLOR2_FMASK = 0xA33F; - constexpr unsigned int mmCB_COLOR3_CLEAR_WORD0 = 0xA350; - constexpr unsigned int mmCB_COLOR3_CLEAR_WORD1 = 0xA351; - constexpr unsigned int mmCB_COLOR3_CMASK = 0xA34C; - constexpr unsigned int mmCB_COLOR3_FMASK = 0xA34E; - constexpr unsigned int mmCB_COLOR4_CLEAR_WORD0 = 0xA35F; - constexpr unsigned int mmCB_COLOR4_CLEAR_WORD1 = 0xA360; - constexpr unsigned int mmCB_COLOR4_CMASK = 0xA35B; - constexpr unsigned int mmCB_COLOR4_FMASK = 0xA35D; - constexpr unsigned int mmCB_COLOR5_CLEAR_WORD0 = 0xA36E; - constexpr unsigned int mmCB_COLOR5_CLEAR_WORD1 = 0xA36F; - constexpr unsigned int mmCB_COLOR5_CMASK = 0xA36A; - constexpr unsigned int mmCB_COLOR5_FMASK = 0xA36C; - constexpr unsigned int mmCB_COLOR6_CLEAR_WORD0 = 0xA37D; - constexpr unsigned int mmCB_COLOR6_CLEAR_WORD1 = 0xA37E; - constexpr unsigned int mmCB_COLOR6_CMASK = 0xA379; - constexpr unsigned int mmCB_COLOR6_FMASK = 0xA37B; - constexpr unsigned int mmCB_COLOR7_CLEAR_WORD0 = 0xA38C; - constexpr unsigned int mmCB_COLOR7_CLEAR_WORD1 = 0xA38D; - constexpr unsigned int mmCB_COLOR7_CMASK = 0xA388; - constexpr unsigned int mmCB_COLOR7_FMASK = 0xA38A; - constexpr unsigned int mmCB_DCC_CONTROL = 0xA109; - constexpr unsigned int mmCP_COHER_BASE = 0xC07E; - constexpr unsigned int mmCP_COHER_BASE_HI = 0xC079; - constexpr unsigned int mmCP_COHER_CNTL = 0xC07C; - constexpr unsigned int mmCP_COHER_SIZE = 0xC07D; - constexpr unsigned int mmCP_COHER_SIZE_HI = 0xC08C; - constexpr unsigned int mmCP_COHER_START_DELAY = 0xC07B; - constexpr unsigned int mmCP_COHER_STATUS = 0xC07F; - constexpr unsigned int mmCP_NUM_PRIM_NEEDED_COUNT0_HI = 0xC00B; - constexpr unsigned int mmCP_NUM_PRIM_NEEDED_COUNT0_LO = 0xC00A; - constexpr unsigned int mmCP_NUM_PRIM_NEEDED_COUNT1_HI = 0xC00F; - constexpr unsigned int mmCP_NUM_PRIM_NEEDED_COUNT1_LO = 0xC00E; - constexpr unsigned int mmCP_NUM_PRIM_NEEDED_COUNT2_HI = 0xC013; - constexpr unsigned int mmCP_NUM_PRIM_NEEDED_COUNT2_LO = 0xC012; - constexpr unsigned int mmCP_NUM_PRIM_NEEDED_COUNT3_HI = 0xC017; - constexpr unsigned int mmCP_NUM_PRIM_NEEDED_COUNT3_LO = 0xC016; - constexpr unsigned int mmCP_NUM_PRIM_WRITTEN_COUNT0_HI = 0xC009; - constexpr unsigned int mmCP_NUM_PRIM_WRITTEN_COUNT0_LO = 0xC008; - constexpr unsigned int mmCP_NUM_PRIM_WRITTEN_COUNT1_HI = 0xC00D; - constexpr unsigned int mmCP_NUM_PRIM_WRITTEN_COUNT1_LO = 0xC00C; - constexpr unsigned int mmCP_NUM_PRIM_WRITTEN_COUNT2_HI = 0xC011; - constexpr unsigned int mmCP_NUM_PRIM_WRITTEN_COUNT2_LO = 0xC010; - constexpr unsigned int mmCP_NUM_PRIM_WRITTEN_COUNT3_HI = 0xC015; - constexpr unsigned int mmCP_NUM_PRIM_WRITTEN_COUNT3_LO = 0xC014; - constexpr unsigned int mmCP_STRMOUT_CNTL = 0xC03F; - constexpr unsigned int mmDB_ZPASS_COUNT_HI = 0xC3FF; - constexpr unsigned int mmDB_ZPASS_COUNT_LOW = 0xC3FE; - constexpr unsigned int mmPA_SC_BINNER_EVENT_CNTL_0 = 0x22CC; - constexpr unsigned int mmPA_SC_BINNER_EVENT_CNTL_1 = 0x22CD; - constexpr unsigned int mmPA_SC_BINNER_EVENT_CNTL_2 = 0x22CE; - constexpr unsigned int mmPA_SC_BINNER_EVENT_CNTL_3 = 0x22CF; - constexpr unsigned int mmPA_SC_BINNER_PERF_CNTL_0 = 0x22D1; - constexpr unsigned int mmPA_SC_BINNER_PERF_CNTL_1 = 0x22D2; - constexpr unsigned int mmPA_SC_BINNER_PERF_CNTL_2 = 0x22D3; - constexpr unsigned int mmPA_SC_BINNER_PERF_CNTL_3 = 0x22D4; - constexpr unsigned int mmPA_SC_BINNER_TIMEOUT_COUNTER = 0x22D0; - constexpr unsigned int mmPA_SC_DSM_CNTL = 0x22FE; - constexpr unsigned int mmPA_SC_ENHANCE = 0x22FC; - constexpr unsigned int mmPA_SC_ENHANCE_1 = 0x22FD; - constexpr unsigned int mmPA_SC_FIFO_SIZE = 0x22F3; - constexpr unsigned int mmPA_SC_FORCE_EOV_MAX_CNTS = 0x22C9; - constexpr unsigned int mmPA_SC_HP3D_TRAP_SCREEN_HV_LOCK = 0x22C1; - constexpr unsigned int mmPA_SC_IF_FIFO_SIZE = 0x22F5; - constexpr unsigned int mmPA_SC_P3D_TRAP_SCREEN_HV_LOCK = 0x22C0; - constexpr unsigned int mmPA_SC_PKR_WAVE_TABLE_CNTL = 0x22F8; - constexpr unsigned int mmPA_SC_TILE_STEERING_CREST_OVERRIDE = 0x22FF; - constexpr unsigned int mmPA_SC_TRAP_SCREEN_HV_LOCK = 0x22C2; - constexpr unsigned int mmPA_SIDEBAND_REQUEST_DELAYS = 0x22FB; - constexpr unsigned int mmRLC_SPM_INT_CNTL = 0xEC72; - constexpr unsigned int mmRLC_SPM_INT_STATUS = 0xEC73; - constexpr unsigned int mmRLC_SPM_MC_CNTL = 0xEC71; - constexpr unsigned int mmRLC_SPM_PERFMON_SEGMENT_SIZE = 0xDC84; - constexpr unsigned int mmSPI_ARB_CNTL_0 = 0x31FD; - constexpr unsigned int mmSPI_COMPUTE_QUEUE_RESET = 0x31DB; - constexpr unsigned int mmSPI_COMPUTE_WF_CTX_SAVE = 0x31FC; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_0 = 0x31DC; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_1 = 0x31DD; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_0 = 0x31E6; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_1 = 0x31E7; - constexpr unsigned int mmSPI_START_PHASE = 0x243B; - constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_1 = 0x24B6; - constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_8 = 0x24BD; - constexpr unsigned int mmSQ_LB_CTR_CTRL = 0x2398; - constexpr unsigned int mmSQ_LB_DATA0 = 0x2399; - constexpr unsigned int mmSQ_LB_DATA1 = 0x239A; - constexpr unsigned int mmSQ_LB_DATA2 = 0x239B; - constexpr unsigned int mmSQ_LB_DATA3 = 0x239C; - constexpr unsigned int mmSQ_PERFCOUNTER0_HI = 0xD1C1; - constexpr unsigned int mmSQ_PERFCOUNTER1_HI = 0xD1C3; - constexpr unsigned int mmSQ_PERFCOUNTER2_HI = 0xD1C5; - constexpr unsigned int mmSQ_PERFCOUNTER3_HI = 0xD1C7; - constexpr unsigned int mmSQ_PERFCOUNTER4_HI = 0xD1C9; - constexpr unsigned int mmSQ_PERFCOUNTER5_HI = 0xD1CB; - constexpr unsigned int mmSQ_PERFCOUNTER6_HI = 0xD1CD; - constexpr unsigned int mmSQ_PERFCOUNTER7_HI = 0xD1CF; - constexpr unsigned int mmSQ_PERFCOUNTER8_HI = 0xD1D1; - constexpr unsigned int mmSQ_PERFCOUNTER8_LO = 0xD1D0; - constexpr unsigned int mmSQ_PERFCOUNTER9_HI = 0xD1D3; - constexpr unsigned int mmSQ_PERFCOUNTER9_LO = 0xD1D2; - constexpr unsigned int mmSQ_PERFCOUNTER10_HI = 0xD1D5; - constexpr unsigned int mmSQ_PERFCOUNTER10_LO = 0xD1D4; - constexpr unsigned int mmSQ_PERFCOUNTER11_HI = 0xD1D7; - constexpr unsigned int mmSQ_PERFCOUNTER11_LO = 0xD1D6; - constexpr unsigned int mmSQ_PERFCOUNTER12_HI = 0xD1D9; - constexpr unsigned int mmSQ_PERFCOUNTER12_LO = 0xD1D8; - constexpr unsigned int mmSQ_PERFCOUNTER13_HI = 0xD1DB; - constexpr unsigned int mmSQ_PERFCOUNTER13_LO = 0xD1DA; - constexpr unsigned int mmSQ_PERFCOUNTER14_HI = 0xD1DD; - constexpr unsigned int mmSQ_PERFCOUNTER14_LO = 0xD1DC; - constexpr unsigned int mmSQ_PERFCOUNTER15_HI = 0xD1DF; - constexpr unsigned int mmSQ_PERFCOUNTER15_LO = 0xD1DE; - constexpr unsigned int mmSQ_RUNTIME_CONFIG = 0x2308; - constexpr unsigned int mmTCP_PERFCOUNTER_FILTER = 0x32B9; - constexpr unsigned int mmTCP_PERFCOUNTER_FILTER_EN = 0x32BA; - constexpr unsigned int mmVGT_ES_PER_GS = 0xA296; - constexpr unsigned int mmVGT_FIFO_DEPTHS = 0x2234; - constexpr unsigned int mmVGT_GROUP_DECR = 0xA28B; - constexpr unsigned int mmVGT_GROUP_FIRST_DECR = 0xA28A; - constexpr unsigned int mmVGT_GROUP_PRIM_TYPE = 0xA289; - constexpr unsigned int mmVGT_GROUP_VECT_0_CNTL = 0xA28C; - constexpr unsigned int mmVGT_GROUP_VECT_0_FMT_CNTL = 0xA28E; - constexpr unsigned int mmVGT_GROUP_VECT_1_CNTL = 0xA28D; - constexpr unsigned int mmVGT_GROUP_VECT_1_FMT_CNTL = 0xA28F; - constexpr unsigned int mmVGT_GS_ONCHIP_CNTL = 0xA291; - constexpr unsigned int mmVGT_GS_OUT_PRIM_TYPE = 0xA29B; - constexpr unsigned int mmVGT_HOS_CNTL = 0xA285; - constexpr unsigned int mmVGT_HOS_REUSE_DEPTH = 0xA288; - constexpr unsigned int mmVGT_IMMED_DATA = 0xA1FD; - constexpr unsigned int mmVGT_INSTANCE_STEP_RATE_0 = 0xA2A8; - constexpr unsigned int mmVGT_INSTANCE_STEP_RATE_1 = 0xA2A9; - constexpr unsigned int mmVGT_LAST_COPY_STATE = 0x2230; - constexpr unsigned int mmVGT_OUTPUT_PATH_CNTL = 0xA284; - constexpr unsigned int mmVGT_VS_MAX_WAVE_ID = 0x2268; - constexpr unsigned int mmVGT_VTX_CNT_EN = 0xA2AE; - constexpr unsigned int mmVGT_VTX_VECT_EJECT_REG = 0x222C; - constexpr unsigned int mmWD_BUF_RESOURCE_1 = 0x2276; - constexpr unsigned int mmWD_BUF_RESOURCE_2 = 0x2277; - constexpr unsigned int mmWD_CNTL_SB_BUF_BASE = 0xC254; - constexpr unsigned int mmWD_CNTL_SB_BUF_BASE_HI = 0xC255; - constexpr unsigned int mmWD_INDEX_BUF_BASE = 0xC256; - constexpr unsigned int mmWD_INDEX_BUF_BASE_HI = 0xC257; - constexpr unsigned int mmWD_POS_BUF_BASE = 0xC252; - constexpr unsigned int mmWD_POS_BUF_BASE_HI = 0xC253; -} // namespace Gfx09_10 - -namespace Gfx09_1x -{ - constexpr unsigned int mmCOMPUTE_SHADER_CHKSUM = 0x2E25; - constexpr unsigned int mmGCEA_PERFCOUNTER0_CFG = 0x2AFE; - constexpr unsigned int mmGCEA_PERFCOUNTER1_CFG = 0x2AFF; - constexpr unsigned int mmGCEA_PERFCOUNTER_HI = 0x2AFD; - constexpr unsigned int mmGCEA_PERFCOUNTER_LO = 0x2AFC; - constexpr unsigned int mmGCEA_PERFCOUNTER_RSLT_CNTL = 0x2700; - constexpr unsigned int mmIA_MULTI_VGT_PARAM_BC = 0xA2AA; - constexpr unsigned int mmRLC_SPM_PERFMON_SAMPLE_DELAY_MAX = 0xDCA4; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL = 0xC443; -} // namespace Gfx09_1x - -namespace Gfx09_1xPlus -{ - constexpr unsigned int mmCOMPUTE_DISPATCH_END = 0x2E7E; - constexpr unsigned int mmPA_CL_PROG_NEAR_CLIP_Z = 0xA187; -} // namespace Gfx09_1xPlus - -namespace Gfx10 -{ - constexpr unsigned int mmCB_COLOR0_CMASK_BASE_EXT = 0xA398; - constexpr unsigned int mmCB_COLOR0_FMASK_BASE_EXT = 0xA3A0; - constexpr unsigned int mmCB_COLOR1_CMASK_BASE_EXT = 0xA399; - constexpr unsigned int mmCB_COLOR1_FMASK_BASE_EXT = 0xA3A1; - constexpr unsigned int mmCB_COLOR2_CMASK_BASE_EXT = 0xA39A; - constexpr unsigned int mmCB_COLOR2_FMASK_BASE_EXT = 0xA3A2; - constexpr unsigned int mmCB_COLOR3_CMASK_BASE_EXT = 0xA39B; - constexpr unsigned int mmCB_COLOR3_FMASK_BASE_EXT = 0xA3A3; - constexpr unsigned int mmCB_COLOR4_CMASK_BASE_EXT = 0xA39C; - constexpr unsigned int mmCB_COLOR4_FMASK_BASE_EXT = 0xA3A4; - constexpr unsigned int mmCB_COLOR5_CMASK_BASE_EXT = 0xA39D; - constexpr unsigned int mmCB_COLOR5_FMASK_BASE_EXT = 0xA3A5; - constexpr unsigned int mmCB_COLOR6_CMASK_BASE_EXT = 0xA39E; - constexpr unsigned int mmCB_COLOR6_FMASK_BASE_EXT = 0xA3A6; - constexpr unsigned int mmCB_COLOR7_CMASK_BASE_EXT = 0xA39F; - constexpr unsigned int mmCB_COLOR7_FMASK_BASE_EXT = 0xA3A7; - constexpr unsigned int mmCOMPUTE_RELAUNCH = 0x2E2B; - constexpr unsigned int mmCOMPUTE_RELAUNCH2 = 0x2E2E; - constexpr unsigned int mmCOMPUTE_WAVE_RESTORE_ADDR_HI = 0x2E2D; - constexpr unsigned int mmCOMPUTE_WAVE_RESTORE_ADDR_LO = 0x2E2C; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER0_CFG = 0xDD2C; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER1_CFG = 0xDD2D; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER2_CFG = 0xDD2E; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER3_CFG = 0xDD2F; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER4_CFG = 0xDD30; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER5_CFG = 0xDD31; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER6_CFG = 0xDD32; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER7_CFG = 0xDD33; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER_HI = 0xD4E9; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER_LO = 0xD4E8; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER_RSLT_CNTL = 0xDD34; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_0_HI = 0xD4FA; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_0_LO = 0xD4F8; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_0_MODE = 0xDD40; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_0_SELECT = 0xDD3C; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_0_SELECT1 = 0xDD3E; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_1_HI = 0xD4FB; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_1_LO = 0xD4F9; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_1_MODE = 0xDD41; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_1_SELECT = 0xDD3D; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_1_SELECT1 = 0xDD3F; - constexpr unsigned int mmGE_DMA_FIRST_INDEX = 0xC251; - constexpr unsigned int mmGE_PC_CNTL = 0x2245; - constexpr unsigned int mmIA_MULTI_VGT_PARAM = 0xA2AA; - constexpr unsigned int mmIA_MULTI_VGT_PARAM_PIPED = 0xC258; - constexpr unsigned int mmPA_PH_ENHANCE = 0x22E1; - constexpr unsigned int mmPA_PH_INTERFACE_FIFO_SIZE = 0x22E0; - constexpr unsigned int mmPA_SC_BC_WAVE_BREAK = 0x22E4; - constexpr unsigned int mmPA_SC_BINNER_CNTL_OVERRIDE = 0x22DE; - constexpr unsigned int mmPA_SC_ENHANCE_2 = 0x22DC; - constexpr unsigned int mmPA_SC_PBB_OVERRIDE_FLAG = 0x22DF; - constexpr unsigned int mmRLC_PERFMON_CLK_CNTL = 0xDCE4; - constexpr unsigned int mmRLC_SPM_ACCUM_CTRLRAM_ADDR = 0xDC97; - constexpr unsigned int mmRLC_SPM_ACCUM_CTRLRAM_DATA = 0xDC98; - constexpr unsigned int mmRLC_SPM_ACCUM_DATARAM_ADDR = 0xDC95; - constexpr unsigned int mmRLC_SPM_ACCUM_DATARAM_DATA = 0xDC96; - constexpr unsigned int mmRLC_SPM_DESER_START_SKEW = 0xDC8B; - constexpr unsigned int mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR = 0xDC90; - constexpr unsigned int mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA = 0xDC91; - constexpr unsigned int mmRLC_SPM_GLOBALS_MUXSEL_SKEW = 0xDC8D; - constexpr unsigned int mmRLC_SPM_GLOBALS_SAMPLE_SKEW = 0xDC8C; - constexpr unsigned int mmRLC_SPM_GLOBAL_MUXSEL_ADDR = 0xDC89; - constexpr unsigned int mmRLC_SPM_GLOBAL_MUXSEL_DATA = 0xDC8A; - constexpr unsigned int mmRLC_SPM_INT_INFO_1 = 0xEC6F; - constexpr unsigned int mmRLC_SPM_INT_INFO_2 = 0xEC70; - constexpr unsigned int mmRLC_SPM_PERFMON_GLB_SEGMENT_SIZE = 0xDCA0; - constexpr unsigned int mmRLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE = 0xDC9F; - constexpr unsigned int mmRLC_SPM_RING_WRPTR = 0xDC94; - constexpr unsigned int mmRLC_SPM_SAMPLE_CNT = 0xED25; - constexpr unsigned int mmRLC_SPM_SE_MUXSEL_ADDR = 0xDC87; - constexpr unsigned int mmRLC_SPM_SE_MUXSEL_DATA = 0xDC88; - constexpr unsigned int mmRLC_SPM_SE_MUXSEL_SKEW = 0xDC8F; - constexpr unsigned int mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR = 0xDC92; - constexpr unsigned int mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA = 0xDC93; - constexpr unsigned int mmRLC_SPM_SE_SAMPLE_SKEW = 0xDC8E; - constexpr unsigned int mmRLC_SPM_VIRT_CTRL = 0xDCA1; - constexpr unsigned int mmRLC_SPM_VIRT_STATUS = 0xDCA3; - constexpr unsigned int mmSPI_FEATURE_CTRL = 0x31FE; - constexpr unsigned int mmSPI_LB_DATA_PERWGP_WAVE_VSPS = 0x24E6; - constexpr unsigned int mmSPI_SHADER_PGM_CHKSUM_VS = 0x2C45; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC2_GS_VS = 0x2C7B; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC4_VS = 0x2C41; - constexpr unsigned int mmSPI_SHADER_REQ_CTRL_VS = 0x2C70; - constexpr unsigned int mmSPI_SHADER_RSRC_LIMIT_CTRL = 0x31FF; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_VS_0 = 0x2C72; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_VS_1 = 0x2C73; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_VS_2 = 0x2C74; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_VS_3 = 0x2C75; - constexpr unsigned int mmSQG_UTCL0_CNTL1 = 0x2317; - constexpr unsigned int mmSQG_UTCL0_CNTL2 = 0x2318; - constexpr unsigned int mmSQG_UTCL0_STATUS = 0x2319; - constexpr unsigned int mmSQ_LB_CTR_SEL0 = 0x239D; - constexpr unsigned int mmSQ_LB_CTR_SEL1 = 0x239E; - constexpr unsigned int mmTCP_PERFCOUNTER_FILTER2 = 0x32BB; - constexpr unsigned int mmUTCL1_PERFCOUNTER0_HI = 0xD51D; - constexpr unsigned int mmUTCL1_PERFCOUNTER0_LO = 0xD51C; - constexpr unsigned int mmUTCL1_PERFCOUNTER0_SELECT = 0xDD63; - constexpr unsigned int mmUTCL1_PERFCOUNTER1_HI = 0xD51F; - constexpr unsigned int mmUTCL1_PERFCOUNTER1_LO = 0xD51E; - constexpr unsigned int mmUTCL1_PERFCOUNTER1_SELECT = 0xDD64; - constexpr unsigned int mmVGT_INDX_OFFSET = 0xA102; - constexpr unsigned int mmVGT_MAX_VTX_INDX = 0xA100; - constexpr unsigned int mmVGT_MIN_VTX_INDX = 0xA101; - constexpr unsigned int mmVGT_MULTI_PRIM_IB_RESET_EN = 0xA2A5; - constexpr unsigned int mmVGT_STRMOUT_DELAY = 0x2275; -} // namespace Gfx10 - -namespace Gfx101 -{ - constexpr unsigned int mmATC_PERFCOUNTER0_CFG = 0x0C1E; - constexpr unsigned int mmATC_PERFCOUNTER1_CFG = 0x0C1F; - constexpr unsigned int mmATC_PERFCOUNTER2_CFG = 0x0C20; - constexpr unsigned int mmATC_PERFCOUNTER3_CFG = 0x0C21; - constexpr unsigned int mmATC_PERFCOUNTER_HI = 0x0C24; - constexpr unsigned int mmATC_PERFCOUNTER_LO = 0x0C23; - constexpr unsigned int mmATC_PERFCOUNTER_RSLT_CNTL = 0x0C22; - constexpr unsigned int mmCHCG_PERFCOUNTER0_HI = 0xD3C9; - constexpr unsigned int mmCHCG_PERFCOUNTER0_LO = 0xD3C8; - constexpr unsigned int mmCHCG_PERFCOUNTER0_SELECT = 0xDBC6; - constexpr unsigned int mmCHCG_PERFCOUNTER0_SELECT1 = 0xDBC7; - constexpr unsigned int mmCHCG_PERFCOUNTER1_HI = 0xD3CB; - constexpr unsigned int mmCHCG_PERFCOUNTER1_LO = 0xD3CA; - constexpr unsigned int mmCHCG_PERFCOUNTER1_SELECT = 0xDBC8; - constexpr unsigned int mmCHCG_PERFCOUNTER2_HI = 0xD3CD; - constexpr unsigned int mmCHCG_PERFCOUNTER2_LO = 0xD3CC; - constexpr unsigned int mmCHCG_PERFCOUNTER2_SELECT = 0xDBC9; - constexpr unsigned int mmCHCG_PERFCOUNTER3_HI = 0xD3CF; - constexpr unsigned int mmCHCG_PERFCOUNTER3_LO = 0xD3CE; - constexpr unsigned int mmCHCG_PERFCOUNTER3_SELECT = 0xDBCA; - constexpr unsigned int mmDB_RESERVED_REG_1 = 0xA016; - constexpr unsigned int mmDB_RESERVED_REG_2 = 0xA00F; - constexpr unsigned int mmDB_RESERVED_REG_3 = 0xA017; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi0 = 0x7511; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi1 = 0x7515; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi2 = 0x7519; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi3 = 0x751D; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi4 = 0x7621; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi5 = 0x7623; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi6 = 0x7625; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi7 = 0x7627; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo0 = 0x7510; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo1 = 0x7514; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo2 = 0x7518; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo3 = 0x751C; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo4 = 0x7620; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo5 = 0x7622; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo6 = 0x7624; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo7 = 0x7626; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi0 = 0x7513; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi1 = 0x7517; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi2 = 0x751B; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi3 = 0x751F; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi4 = 0x75E5; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi5 = 0x75E7; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi6 = 0x75E9; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi7 = 0x75EB; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo0 = 0x7512; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo1 = 0x7516; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo2 = 0x751A; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo3 = 0x751E; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo4 = 0x75E4; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo5 = 0x75E6; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo6 = 0x75E8; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo7 = 0x75EA; - constexpr unsigned int mmGCEA_PERFCOUNTER0_CFG = 0x2706; - constexpr unsigned int mmGCEA_PERFCOUNTER1_CFG = 0x2707; - constexpr unsigned int mmGCEA_PERFCOUNTER_HI = 0x2705; - constexpr unsigned int mmGCEA_PERFCOUNTER_LO = 0x2704; - constexpr unsigned int mmGCEA_PERFCOUNTER_RSLT_CNTL = 0x2708; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER0_CFG = 0xDD20; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER1_CFG = 0xDD21; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_HI = 0xD4FD; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_LO = 0xD4FC; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_MODE = 0xDD4E; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_SELECT = 0xDD4C; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_SELECT1 = 0xDD4D; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_HI = 0xD4E1; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_LO = 0xD4E0; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_RSLT_CNTL = 0xDD22; - constexpr unsigned int mmGE_FAST_CLKS = 0x2248; - constexpr unsigned int mmGE_PERFCOUNTER0_HI = 0xD081; - constexpr unsigned int mmGE_PERFCOUNTER0_LO = 0xD080; - constexpr unsigned int mmGE_PERFCOUNTER0_SELECT = 0xD880; - constexpr unsigned int mmGE_PERFCOUNTER0_SELECT1 = 0xD881; - constexpr unsigned int mmGE_PERFCOUNTER1_HI = 0xD083; - constexpr unsigned int mmGE_PERFCOUNTER1_LO = 0xD082; - constexpr unsigned int mmGE_PERFCOUNTER1_SELECT = 0xD882; - constexpr unsigned int mmGE_PERFCOUNTER1_SELECT1 = 0xD883; - constexpr unsigned int mmGE_PERFCOUNTER2_HI = 0xD085; - constexpr unsigned int mmGE_PERFCOUNTER2_LO = 0xD084; - constexpr unsigned int mmGE_PERFCOUNTER2_SELECT = 0xD884; - constexpr unsigned int mmGE_PERFCOUNTER2_SELECT1 = 0xD885; - constexpr unsigned int mmGE_PERFCOUNTER3_HI = 0xD087; - constexpr unsigned int mmGE_PERFCOUNTER3_LO = 0xD086; - constexpr unsigned int mmGE_PERFCOUNTER3_SELECT = 0xD886; - constexpr unsigned int mmGE_PERFCOUNTER3_SELECT1 = 0xD887; - constexpr unsigned int mmGE_PERFCOUNTER4_HI = 0xD089; - constexpr unsigned int mmGE_PERFCOUNTER4_LO = 0xD088; - constexpr unsigned int mmGE_PERFCOUNTER4_SELECT = 0xD888; - constexpr unsigned int mmGE_PERFCOUNTER5_HI = 0xD08B; - constexpr unsigned int mmGE_PERFCOUNTER5_LO = 0xD08A; - constexpr unsigned int mmGE_PERFCOUNTER5_SELECT = 0xD88A; - constexpr unsigned int mmGE_PERFCOUNTER6_HI = 0xD08D; - constexpr unsigned int mmGE_PERFCOUNTER6_LO = 0xD08C; - constexpr unsigned int mmGE_PERFCOUNTER6_SELECT = 0xD88C; - constexpr unsigned int mmGE_PERFCOUNTER7_HI = 0xD08F; - constexpr unsigned int mmGE_PERFCOUNTER7_LO = 0xD08E; - constexpr unsigned int mmGE_PERFCOUNTER7_SELECT = 0xD88E; - constexpr unsigned int mmGE_PERFCOUNTER8_HI = 0xD091; - constexpr unsigned int mmGE_PERFCOUNTER8_LO = 0xD090; - constexpr unsigned int mmGE_PERFCOUNTER8_SELECT = 0xD890; - constexpr unsigned int mmGE_PERFCOUNTER9_HI = 0xD093; - constexpr unsigned int mmGE_PERFCOUNTER9_LO = 0xD092; - constexpr unsigned int mmGE_PERFCOUNTER9_SELECT = 0xD892; - constexpr unsigned int mmGE_PERFCOUNTER10_HI = 0xD095; - constexpr unsigned int mmGE_PERFCOUNTER10_LO = 0xD094; - constexpr unsigned int mmGE_PERFCOUNTER10_SELECT = 0xD894; - constexpr unsigned int mmGE_PERFCOUNTER11_HI = 0xD097; - constexpr unsigned int mmGE_PERFCOUNTER11_LO = 0xD096; - constexpr unsigned int mmGE_PERFCOUNTER11_SELECT = 0xD896; - constexpr unsigned int mmGUS_PERFCOUNTER0_CFG = 0xCC40; - constexpr unsigned int mmGUS_PERFCOUNTER1_CFG = 0xCC41; - constexpr unsigned int mmGUS_PERFCOUNTER2_HI = 0xD641; - constexpr unsigned int mmGUS_PERFCOUNTER2_LO = 0xD640; - constexpr unsigned int mmGUS_PERFCOUNTER2_MODE = 0xDE02; - constexpr unsigned int mmGUS_PERFCOUNTER2_SELECT = 0xDE00; - constexpr unsigned int mmGUS_PERFCOUNTER2_SELECT1 = 0xDE01; - constexpr unsigned int mmGUS_PERFCOUNTER_HI = 0xCC3F; - constexpr unsigned int mmGUS_PERFCOUNTER_LO = 0xCC3E; - constexpr unsigned int mmGUS_PERFCOUNTER_RSLT_CNTL = 0xCC42; - constexpr unsigned int mmMP1_SMN_FPS_CNT = 0x162C4; - constexpr unsigned int mmPerfMonCtl1 = 0x14341; - constexpr unsigned int mmPerfMonCtl2 = 0x14342; - constexpr unsigned int mmPerfMonCtl3 = 0x14343; - constexpr unsigned int mmPerfMonCtl4 = 0x14344; - constexpr unsigned int mmPerfMonCtl5 = 0x14345; - constexpr unsigned int mmPerfMonCtlClk = 0x14340; - constexpr unsigned int mmPerfMonCtr1_Hi = 0x1434B; - constexpr unsigned int mmPerfMonCtr1_Lo = 0x1434A; - constexpr unsigned int mmPerfMonCtr2_Hi = 0x1434D; - constexpr unsigned int mmPerfMonCtr2_Lo = 0x1434C; - constexpr unsigned int mmPerfMonCtr3_Hi = 0x1434F; - constexpr unsigned int mmPerfMonCtr3_Lo = 0x1434E; - constexpr unsigned int mmPerfMonCtr4_Hi = 0x14351; - constexpr unsigned int mmPerfMonCtr4_Lo = 0x14350; - constexpr unsigned int mmPerfMonCtr5_Hi = 0x14353; - constexpr unsigned int mmPerfMonCtr5_Lo = 0x14352; - constexpr unsigned int mmPerfMonCtrClk_Hi = 0x14349; - constexpr unsigned int mmPerfMonCtrClk_Lo = 0x14348; - constexpr unsigned int mmRPB_PERF_COUNTER_CNTL = 0x0CE2; - constexpr unsigned int mmSPI_CONFIG_CNTL = 0x2440; - constexpr unsigned int mmSPI_CONFIG_CNTL_1 = 0x244F; - constexpr unsigned int mmSPI_CONFIG_CNTL_1_REMAP = 0xC441; - constexpr unsigned int mmSPI_CONFIG_CNTL_2 = 0x244E; - constexpr unsigned int mmSPI_CONFIG_CNTL_2_REMAP = 0xC442; - constexpr unsigned int mmSPI_CONFIG_CNTL_REMAP = 0xC440; - constexpr unsigned int mmSPI_CSQ_WF_ACTIVE_COUNT_4 = 0x24E0; - constexpr unsigned int mmSPI_CSQ_WF_ACTIVE_COUNT_5 = 0x24E1; - constexpr unsigned int mmSPI_CSQ_WF_ACTIVE_COUNT_6 = 0x24E2; - constexpr unsigned int mmSPI_CSQ_WF_ACTIVE_COUNT_7 = 0x24E3; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_2 = 0x31DE; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_3 = 0x31DF; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_4 = 0x31E0; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_5 = 0x31E1; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_6 = 0x31E2; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_7 = 0x31E3; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_8 = 0x31E4; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_9 = 0x31E5; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_10 = 0x31F0; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_11 = 0x31F1; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_12 = 0x31F4; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_13 = 0x31F5; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_14 = 0x31F6; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_15 = 0x31F7; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_2 = 0x31E8; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_3 = 0x31E9; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_4 = 0x31EA; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_5 = 0x31EB; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_6 = 0x31EC; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_7 = 0x31ED; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_8 = 0x31EE; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_9 = 0x31EF; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_10 = 0x31F2; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_11 = 0x31F3; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_12 = 0x31F8; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_13 = 0x31F9; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_14 = 0x31FA; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_15 = 0x31FB; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC1_ES = 0x2CCA; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC1_LS = 0x2D4A; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC2_ES = 0x2CCB; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC2_ES_GS = 0x2CBC; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC2_ES_VS = 0x2C7C; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC2_LS = 0x2D4B; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC2_LS_ES = 0x2CFD; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC2_LS_HS = 0x2D3D; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC2_LS_VS = 0x2C7D; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC3_ES = 0x2CC7; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC3_LS = 0x2D47; - constexpr unsigned int mmSPI_SHADER_PREF_PRI_CNTR_CTRL_ESGS = 0x2CB1; - constexpr unsigned int mmSPI_SHADER_PREF_PRI_CNTR_CTRL_LSHS = 0x2D31; - constexpr unsigned int mmSPI_SHADER_PREF_PRI_CNTR_CTRL_PS = 0x2C31; - constexpr unsigned int mmSPI_SHADER_PREF_PRI_CNTR_CTRL_VS = 0x2C71; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_0 = 0x2CCC; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_1 = 0x2CCD; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_2 = 0x2CCE; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_3 = 0x2CCF; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_4 = 0x2CD0; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_5 = 0x2CD1; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_6 = 0x2CD2; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_7 = 0x2CD3; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_8 = 0x2CD4; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_9 = 0x2CD5; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_10 = 0x2CD6; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_11 = 0x2CD7; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_12 = 0x2CD8; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_13 = 0x2CD9; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_14 = 0x2CDA; - constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_15 = 0x2CDB; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_0 = 0x2D4C; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_1 = 0x2D4D; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_2 = 0x2D4E; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_3 = 0x2D4F; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_4 = 0x2D50; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_5 = 0x2D51; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_6 = 0x2D52; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_7 = 0x2D53; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_8 = 0x2D54; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_9 = 0x2D55; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_10 = 0x2D56; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_11 = 0x2D57; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_12 = 0x2D58; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_13 = 0x2D59; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_14 = 0x2D5A; - constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_15 = 0x2D5B; - constexpr unsigned int mmSPI_SYS_COMPUTE = 0x2721; - constexpr unsigned int mmSPI_SYS_WIF_CNTL = 0x2722; - constexpr unsigned int mmSPI_USER_ACCUM_VMID_CNTL = 0x243F; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL = 0x244D; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL_REMAP = 0xC443; - constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_CS4 = 0x31CD; - constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_CS5 = 0x31CE; - constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_CS6 = 0x31CF; - constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_CS7 = 0x31D0; - constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_6 = 0x24B1; - constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_7 = 0x24B2; - constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_8 = 0x24B3; - constexpr unsigned int mmSPI_WF_LIFETIME_LIMIT_9 = 0x24B4; - constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_3 = 0x24B8; - constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_5 = 0x24BA; - constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_10 = 0x24BF; - constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_12 = 0x24C1; - constexpr unsigned int mmSQ_INTERRUPT_AUTO_MASK = 0x2314; - constexpr unsigned int mmSQ_INTERRUPT_MSG_CTRL = 0x2315; - constexpr unsigned int mmSQ_SHADER_TBA_HI = 0x231D; - constexpr unsigned int mmSQ_SHADER_TBA_LO = 0x231C; - constexpr unsigned int mmSQ_SHADER_TMA_HI = 0x231F; - constexpr unsigned int mmSQ_SHADER_TMA_LO = 0x231E; - constexpr unsigned int mmUMCCH0_PerfMonCtl1 = 0x14341; - constexpr unsigned int mmUMCCH0_PerfMonCtl2 = 0x14342; - constexpr unsigned int mmUMCCH0_PerfMonCtl3 = 0x14343; - constexpr unsigned int mmUMCCH0_PerfMonCtl4 = 0x14344; - constexpr unsigned int mmUMCCH0_PerfMonCtl5 = 0x14345; - constexpr unsigned int mmUMCCH0_PerfMonCtlClk = 0x14340; - constexpr unsigned int mmUMCCH0_PerfMonCtr1_Hi = 0x1434B; - constexpr unsigned int mmUMCCH0_PerfMonCtr1_Lo = 0x1434A; - constexpr unsigned int mmUMCCH0_PerfMonCtr2_Hi = 0x1434D; - constexpr unsigned int mmUMCCH0_PerfMonCtr2_Lo = 0x1434C; - constexpr unsigned int mmUMCCH0_PerfMonCtr3_Hi = 0x1434F; - constexpr unsigned int mmUMCCH0_PerfMonCtr3_Lo = 0x1434E; - constexpr unsigned int mmUMCCH0_PerfMonCtr4_Hi = 0x14351; - constexpr unsigned int mmUMCCH0_PerfMonCtr4_Lo = 0x14350; - constexpr unsigned int mmUMCCH0_PerfMonCtr5_Hi = 0x14353; - constexpr unsigned int mmUMCCH0_PerfMonCtr5_Lo = 0x14352; - constexpr unsigned int mmUMCCH0_PerfMonCtrClk_Hi = 0x14349; - constexpr unsigned int mmUMCCH0_PerfMonCtrClk_Lo = 0x14348; - constexpr unsigned int mmUMCCH1_PerfMonCtl1 = 0x14B41; - constexpr unsigned int mmUMCCH1_PerfMonCtl2 = 0x14B42; - constexpr unsigned int mmUMCCH1_PerfMonCtl3 = 0x14B43; - constexpr unsigned int mmUMCCH1_PerfMonCtl4 = 0x14B44; - constexpr unsigned int mmUMCCH1_PerfMonCtl5 = 0x14B45; - constexpr unsigned int mmUMCCH1_PerfMonCtlClk = 0x14B40; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Hi = 0x14B4B; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Lo = 0x14B4A; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Hi = 0x14B4D; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Lo = 0x14B4C; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Hi = 0x14B4F; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Lo = 0x14B4E; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Hi = 0x14B51; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Lo = 0x14B50; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Hi = 0x14B53; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Lo = 0x14B52; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Hi = 0x14B49; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Lo = 0x14B48; - constexpr unsigned int mmUMCCH2_PerfMonCtl1 = 0x54341; - constexpr unsigned int mmUMCCH2_PerfMonCtl2 = 0x54342; - constexpr unsigned int mmUMCCH2_PerfMonCtl3 = 0x54343; - constexpr unsigned int mmUMCCH2_PerfMonCtl4 = 0x54344; - constexpr unsigned int mmUMCCH2_PerfMonCtl5 = 0x54345; - constexpr unsigned int mmUMCCH2_PerfMonCtlClk = 0x54340; - constexpr unsigned int mmUMCCH2_PerfMonCtr1_Hi = 0x5434B; - constexpr unsigned int mmUMCCH2_PerfMonCtr1_Lo = 0x5434A; - constexpr unsigned int mmUMCCH2_PerfMonCtr2_Hi = 0x5434D; - constexpr unsigned int mmUMCCH2_PerfMonCtr2_Lo = 0x5434C; - constexpr unsigned int mmUMCCH2_PerfMonCtr3_Hi = 0x5434F; - constexpr unsigned int mmUMCCH2_PerfMonCtr3_Lo = 0x5434E; - constexpr unsigned int mmUMCCH2_PerfMonCtr4_Hi = 0x54351; - constexpr unsigned int mmUMCCH2_PerfMonCtr4_Lo = 0x54350; - constexpr unsigned int mmUMCCH2_PerfMonCtr5_Hi = 0x54353; - constexpr unsigned int mmUMCCH2_PerfMonCtr5_Lo = 0x54352; - constexpr unsigned int mmUMCCH2_PerfMonCtrClk_Hi = 0x54349; - constexpr unsigned int mmUMCCH2_PerfMonCtrClk_Lo = 0x54348; - constexpr unsigned int mmUMCCH3_PerfMonCtl1 = 0x54B41; - constexpr unsigned int mmUMCCH3_PerfMonCtl2 = 0x54B42; - constexpr unsigned int mmUMCCH3_PerfMonCtl3 = 0x54B43; - constexpr unsigned int mmUMCCH3_PerfMonCtl4 = 0x54B44; - constexpr unsigned int mmUMCCH3_PerfMonCtl5 = 0x54B45; - constexpr unsigned int mmUMCCH3_PerfMonCtlClk = 0x54B40; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Hi = 0x54B4B; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Lo = 0x54B4A; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Hi = 0x54B4D; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Lo = 0x54B4C; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Hi = 0x54B4F; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Lo = 0x54B4E; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Hi = 0x54B51; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Lo = 0x54B50; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Hi = 0x54B53; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Lo = 0x54B52; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Hi = 0x54B49; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Lo = 0x54B48; - constexpr unsigned int mmUMCCH4_PerfMonCtl1 = 0x94341; - constexpr unsigned int mmUMCCH4_PerfMonCtl2 = 0x94342; - constexpr unsigned int mmUMCCH4_PerfMonCtl3 = 0x94343; - constexpr unsigned int mmUMCCH4_PerfMonCtl4 = 0x94344; - constexpr unsigned int mmUMCCH4_PerfMonCtl5 = 0x94345; - constexpr unsigned int mmUMCCH4_PerfMonCtlClk = 0x94340; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Hi = 0x9434B; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Lo = 0x9434A; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Hi = 0x9434D; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Lo = 0x9434C; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Hi = 0x9434F; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Lo = 0x9434E; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Hi = 0x94351; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Lo = 0x94350; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Hi = 0x94353; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Lo = 0x94352; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Hi = 0x94349; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Lo = 0x94348; - constexpr unsigned int mmUMCCH5_PerfMonCtl1 = 0x94B41; - constexpr unsigned int mmUMCCH5_PerfMonCtl2 = 0x94B42; - constexpr unsigned int mmUMCCH5_PerfMonCtl3 = 0x94B43; - constexpr unsigned int mmUMCCH5_PerfMonCtl4 = 0x94B44; - constexpr unsigned int mmUMCCH5_PerfMonCtl5 = 0x94B45; - constexpr unsigned int mmUMCCH5_PerfMonCtlClk = 0x94B40; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Hi = 0x94B4B; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Lo = 0x94B4A; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Hi = 0x94B4D; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Lo = 0x94B4C; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Hi = 0x94B4F; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Lo = 0x94B4E; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Hi = 0x94B51; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Lo = 0x94B50; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Hi = 0x94B53; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Lo = 0x94B52; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Hi = 0x94B49; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Lo = 0x94B48; - constexpr unsigned int mmUMCCH6_PerfMonCtl1 = 0xD4341; - constexpr unsigned int mmUMCCH6_PerfMonCtl2 = 0xD4342; - constexpr unsigned int mmUMCCH6_PerfMonCtl3 = 0xD4343; - constexpr unsigned int mmUMCCH6_PerfMonCtl4 = 0xD4344; - constexpr unsigned int mmUMCCH6_PerfMonCtl5 = 0xD4345; - constexpr unsigned int mmUMCCH6_PerfMonCtlClk = 0xD4340; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Hi = 0xD434B; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Lo = 0xD434A; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Hi = 0xD434D; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Lo = 0xD434C; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Hi = 0xD434F; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Lo = 0xD434E; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Hi = 0xD4351; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Lo = 0xD4350; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Hi = 0xD4353; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Lo = 0xD4352; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Hi = 0xD4349; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Lo = 0xD4348; - constexpr unsigned int mmUMCCH7_PerfMonCtl1 = 0xD4B41; - constexpr unsigned int mmUMCCH7_PerfMonCtl2 = 0xD4B42; - constexpr unsigned int mmUMCCH7_PerfMonCtl3 = 0xD4B43; - constexpr unsigned int mmUMCCH7_PerfMonCtl4 = 0xD4B44; - constexpr unsigned int mmUMCCH7_PerfMonCtl5 = 0xD4B45; - constexpr unsigned int mmUMCCH7_PerfMonCtlClk = 0xD4B40; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Hi = 0xD4B4B; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Lo = 0xD4B4A; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Hi = 0xD4B4D; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Lo = 0xD4B4C; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Hi = 0xD4B4F; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Lo = 0xD4B4E; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Hi = 0xD4B51; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Lo = 0xD4B50; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Hi = 0xD4B53; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Lo = 0xD4B52; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Hi = 0xD4B49; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Lo = 0xD4B48; - constexpr unsigned int mmUMCCH8_PerfMonCtl1 = 0x114341; - constexpr unsigned int mmUMCCH8_PerfMonCtl2 = 0x114342; - constexpr unsigned int mmUMCCH8_PerfMonCtl3 = 0x114343; - constexpr unsigned int mmUMCCH8_PerfMonCtl4 = 0x114344; - constexpr unsigned int mmUMCCH8_PerfMonCtl5 = 0x114345; - constexpr unsigned int mmUMCCH8_PerfMonCtlClk = 0x114340; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Hi = 0x11434B; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Lo = 0x11434A; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Hi = 0x11434D; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Lo = 0x11434C; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Hi = 0x11434F; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Lo = 0x11434E; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Hi = 0x114351; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Lo = 0x114350; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Hi = 0x114353; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Lo = 0x114352; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Hi = 0x114349; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Lo = 0x114348; - constexpr unsigned int mmUMCCH9_PerfMonCtl1 = 0x114B41; - constexpr unsigned int mmUMCCH9_PerfMonCtl2 = 0x114B42; - constexpr unsigned int mmUMCCH9_PerfMonCtl3 = 0x114B43; - constexpr unsigned int mmUMCCH9_PerfMonCtl4 = 0x114B44; - constexpr unsigned int mmUMCCH9_PerfMonCtl5 = 0x114B45; - constexpr unsigned int mmUMCCH9_PerfMonCtlClk = 0x114B40; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Hi = 0x114B4B; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Lo = 0x114B4A; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Hi = 0x114B4D; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Lo = 0x114B4C; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Hi = 0x114B4F; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Lo = 0x114B4E; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Hi = 0x114B51; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Lo = 0x114B50; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Hi = 0x114B53; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Lo = 0x114B52; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Hi = 0x114B49; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Lo = 0x114B48; - constexpr unsigned int mmUMCCH10_PerfMonCtl1 = 0x154341; - constexpr unsigned int mmUMCCH10_PerfMonCtl2 = 0x154342; - constexpr unsigned int mmUMCCH10_PerfMonCtl3 = 0x154343; - constexpr unsigned int mmUMCCH10_PerfMonCtl4 = 0x154344; - constexpr unsigned int mmUMCCH10_PerfMonCtl5 = 0x154345; - constexpr unsigned int mmUMCCH10_PerfMonCtlClk = 0x154340; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Hi = 0x15434B; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Lo = 0x15434A; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Hi = 0x15434D; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Lo = 0x15434C; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Hi = 0x15434F; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Lo = 0x15434E; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Hi = 0x154351; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Lo = 0x154350; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Hi = 0x154353; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Lo = 0x154352; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Hi = 0x154349; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Lo = 0x154348; - constexpr unsigned int mmUMCCH11_PerfMonCtl1 = 0x154B41; - constexpr unsigned int mmUMCCH11_PerfMonCtl2 = 0x154B42; - constexpr unsigned int mmUMCCH11_PerfMonCtl3 = 0x154B43; - constexpr unsigned int mmUMCCH11_PerfMonCtl4 = 0x154B44; - constexpr unsigned int mmUMCCH11_PerfMonCtl5 = 0x154B45; - constexpr unsigned int mmUMCCH11_PerfMonCtlClk = 0x154B40; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Hi = 0x154B4B; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Lo = 0x154B4A; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Hi = 0x154B4D; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Lo = 0x154B4C; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Hi = 0x154B4F; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Lo = 0x154B4E; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Hi = 0x154B51; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Lo = 0x154B50; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Hi = 0x154B53; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Lo = 0x154B52; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Hi = 0x154B49; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Lo = 0x154B48; - constexpr unsigned int mmUMCCH12_PerfMonCtl1 = 0x194341; - constexpr unsigned int mmUMCCH12_PerfMonCtl2 = 0x194342; - constexpr unsigned int mmUMCCH12_PerfMonCtl3 = 0x194343; - constexpr unsigned int mmUMCCH12_PerfMonCtl4 = 0x194344; - constexpr unsigned int mmUMCCH12_PerfMonCtl5 = 0x194345; - constexpr unsigned int mmUMCCH12_PerfMonCtlClk = 0x194340; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Hi = 0x19434B; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Lo = 0x19434A; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Hi = 0x19434D; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Lo = 0x19434C; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Hi = 0x19434F; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Lo = 0x19434E; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Hi = 0x194351; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Lo = 0x194350; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Hi = 0x194353; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Lo = 0x194352; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Hi = 0x194349; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Lo = 0x194348; - constexpr unsigned int mmUMCCH13_PerfMonCtl1 = 0x194B41; - constexpr unsigned int mmUMCCH13_PerfMonCtl2 = 0x194B42; - constexpr unsigned int mmUMCCH13_PerfMonCtl3 = 0x194B43; - constexpr unsigned int mmUMCCH13_PerfMonCtl4 = 0x194B44; - constexpr unsigned int mmUMCCH13_PerfMonCtl5 = 0x194B45; - constexpr unsigned int mmUMCCH13_PerfMonCtlClk = 0x194B40; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Hi = 0x194B4B; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Lo = 0x194B4A; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Hi = 0x194B4D; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Lo = 0x194B4C; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Hi = 0x194B4F; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Lo = 0x194B4E; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Hi = 0x194B51; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Lo = 0x194B50; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Hi = 0x194B53; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Lo = 0x194B52; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Hi = 0x194B49; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Lo = 0x194B48; - constexpr unsigned int mmUMCCH14_PerfMonCtl1 = 0x1D4341; - constexpr unsigned int mmUMCCH14_PerfMonCtl2 = 0x1D4342; - constexpr unsigned int mmUMCCH14_PerfMonCtl3 = 0x1D4343; - constexpr unsigned int mmUMCCH14_PerfMonCtl4 = 0x1D4344; - constexpr unsigned int mmUMCCH14_PerfMonCtl5 = 0x1D4345; - constexpr unsigned int mmUMCCH14_PerfMonCtlClk = 0x1D4340; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Hi = 0x1D434B; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Lo = 0x1D434A; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Hi = 0x1D434D; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Lo = 0x1D434C; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Hi = 0x1D434F; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Lo = 0x1D434E; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Hi = 0x1D4351; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Lo = 0x1D4350; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Hi = 0x1D4353; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Lo = 0x1D4352; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Hi = 0x1D4349; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Lo = 0x1D4348; - constexpr unsigned int mmUMCCH15_PerfMonCtl1 = 0x1D4B41; - constexpr unsigned int mmUMCCH15_PerfMonCtl2 = 0x1D4B42; - constexpr unsigned int mmUMCCH15_PerfMonCtl3 = 0x1D4B43; - constexpr unsigned int mmUMCCH15_PerfMonCtl4 = 0x1D4B44; - constexpr unsigned int mmUMCCH15_PerfMonCtl5 = 0x1D4B45; - constexpr unsigned int mmUMCCH15_PerfMonCtlClk = 0x1D4B40; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Hi = 0x1D4B4B; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Lo = 0x1D4B4A; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Hi = 0x1D4B4D; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Lo = 0x1D4B4C; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Hi = 0x1D4B4F; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Lo = 0x1D4B4E; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Hi = 0x1D4B51; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Lo = 0x1D4B50; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Hi = 0x1D4B53; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Lo = 0x1D4B52; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Hi = 0x1D4B49; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Lo = 0x1D4B48; - constexpr unsigned int mmVGT_CACHE_INVALIDATION = 0x2231; - constexpr unsigned int mmVGT_CNTL_STATUS = 0x223C; - constexpr unsigned int mmVGT_ESGS_RING_SIZE = 0x2232; - constexpr unsigned int mmVGT_ESGS_RING_SIZE_UMD = 0xC240; - constexpr unsigned int mmVGT_GSVS_RING_SIZE = 0x2233; - constexpr unsigned int mmVGT_GSVS_RING_SIZE_UMD = 0xC241; - constexpr unsigned int mmVGT_HS_OFFCHIP_PARAM = 0x226C; - constexpr unsigned int mmVGT_HS_OFFCHIP_PARAM_UMD = 0xC24F; - constexpr unsigned int mmVGT_TF_MEMORY_BASE = 0x226E; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI = 0x2278; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI_UMD = 0xC261; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_UMD = 0xC250; - constexpr unsigned int mmVGT_TF_RING_SIZE = 0x2262; - constexpr unsigned int mmVGT_TF_RING_SIZE_UMD = 0xC24E; -} // namespace Gfx101 - -namespace Gfx103 -{ - constexpr unsigned int mmATC_PERFCOUNTER0_CFG = 0x0C10; - constexpr unsigned int mmATC_PERFCOUNTER1_CFG = 0x0C11; - constexpr unsigned int mmATC_PERFCOUNTER2_CFG = 0x0C12; - constexpr unsigned int mmATC_PERFCOUNTER3_CFG = 0x0C13; - constexpr unsigned int mmATC_PERFCOUNTER_HI = 0x0C16; - constexpr unsigned int mmATC_PERFCOUNTER_LO = 0x0C15; - constexpr unsigned int mmATC_PERFCOUNTER_RSLT_CNTL = 0x0C14; - constexpr unsigned int mmRPB_PERF_COUNTER_CNTL = 0x0CE7; - constexpr unsigned int mmSQ_THREAD_TRACE_STATUS2 = 0x234F; -} // namespace Gfx103 - -namespace Gfx103CorePlus -{ - constexpr unsigned int mmGCEA_PERFCOUNTER0_CFG = 0xDA03; - constexpr unsigned int mmGCEA_PERFCOUNTER1_CFG = 0xDA04; - constexpr unsigned int mmGCEA_PERFCOUNTER_HI = 0xD263; - constexpr unsigned int mmGCEA_PERFCOUNTER_LO = 0xD262; - constexpr unsigned int mmGCEA_PERFCOUNTER_RSLT_CNTL = 0xDA05; - constexpr unsigned int mmGDS_PERFCOUNTER1_SELECT1 = 0xDA85; - constexpr unsigned int mmGDS_PERFCOUNTER2_SELECT1 = 0xDA86; - constexpr unsigned int mmGDS_PERFCOUNTER3_SELECT1 = 0xDA87; - constexpr unsigned int mmSDMA0_PERFCNT_MISC_CNTL = 0xDE23; - constexpr unsigned int mmSDMA0_PERFCNT_PERFCOUNTER0_CFG = 0xDE20; - constexpr unsigned int mmSDMA0_PERFCNT_PERFCOUNTER1_CFG = 0xDE21; - constexpr unsigned int mmSDMA0_PERFCNT_PERFCOUNTER_HI = 0xD661; - constexpr unsigned int mmSDMA0_PERFCNT_PERFCOUNTER_LO = 0xD660; - constexpr unsigned int mmSDMA0_PERFCNT_PERFCOUNTER_RSLT_CNTL = 0xDE22; - constexpr unsigned int mmSDMA0_PERFCOUNTER0_HI = 0xD663; - constexpr unsigned int mmSDMA0_PERFCOUNTER0_LO = 0xD662; - constexpr unsigned int mmSDMA0_PERFCOUNTER0_SELECT = 0xDE24; - constexpr unsigned int mmSDMA0_PERFCOUNTER0_SELECT1 = 0xDE25; - constexpr unsigned int mmSDMA0_PERFCOUNTER1_HI = 0xD665; - constexpr unsigned int mmSDMA0_PERFCOUNTER1_LO = 0xD664; - constexpr unsigned int mmSDMA0_PERFCOUNTER1_SELECT = 0xDE26; - constexpr unsigned int mmSDMA0_PERFCOUNTER1_SELECT1 = 0xDE27; - constexpr unsigned int mmSPI_CSG_PIPE_CONTROL = 0x243D; - constexpr unsigned int mmUMCCH0_PerfMonCtl1 = 0x14341; - constexpr unsigned int mmUMCCH0_PerfMonCtl2 = 0x14342; - constexpr unsigned int mmUMCCH0_PerfMonCtl3 = 0x14343; - constexpr unsigned int mmUMCCH0_PerfMonCtl4 = 0x14344; - constexpr unsigned int mmUMCCH0_PerfMonCtl5 = 0x14345; - constexpr unsigned int mmUMCCH0_PerfMonCtlClk = 0x14340; - constexpr unsigned int mmUMCCH0_PerfMonCtr1_Hi = 0x1434B; - constexpr unsigned int mmUMCCH0_PerfMonCtr1_Lo = 0x1434A; - constexpr unsigned int mmUMCCH0_PerfMonCtr2_Hi = 0x1434D; - constexpr unsigned int mmUMCCH0_PerfMonCtr2_Lo = 0x1434C; - constexpr unsigned int mmUMCCH0_PerfMonCtr3_Hi = 0x1434F; - constexpr unsigned int mmUMCCH0_PerfMonCtr3_Lo = 0x1434E; - constexpr unsigned int mmUMCCH0_PerfMonCtr4_Hi = 0x14351; - constexpr unsigned int mmUMCCH0_PerfMonCtr4_Lo = 0x14350; - constexpr unsigned int mmUMCCH0_PerfMonCtr5_Hi = 0x14353; - constexpr unsigned int mmUMCCH0_PerfMonCtr5_Lo = 0x14352; - constexpr unsigned int mmUMCCH0_PerfMonCtrClk_Hi = 0x14349; - constexpr unsigned int mmUMCCH0_PerfMonCtrClk_Lo = 0x14348; - constexpr unsigned int mmUMCCH2_PerfMonCtl1 = 0x54341; - constexpr unsigned int mmUMCCH2_PerfMonCtl2 = 0x54342; - constexpr unsigned int mmUMCCH2_PerfMonCtl3 = 0x54343; - constexpr unsigned int mmUMCCH2_PerfMonCtl4 = 0x54344; - constexpr unsigned int mmUMCCH2_PerfMonCtl5 = 0x54345; - constexpr unsigned int mmUMCCH2_PerfMonCtlClk = 0x54340; - constexpr unsigned int mmUMCCH2_PerfMonCtr1_Hi = 0x5434B; - constexpr unsigned int mmUMCCH2_PerfMonCtr1_Lo = 0x5434A; - constexpr unsigned int mmUMCCH2_PerfMonCtr2_Hi = 0x5434D; - constexpr unsigned int mmUMCCH2_PerfMonCtr2_Lo = 0x5434C; - constexpr unsigned int mmUMCCH2_PerfMonCtr3_Hi = 0x5434F; - constexpr unsigned int mmUMCCH2_PerfMonCtr3_Lo = 0x5434E; - constexpr unsigned int mmUMCCH2_PerfMonCtr4_Hi = 0x54351; - constexpr unsigned int mmUMCCH2_PerfMonCtr4_Lo = 0x54350; - constexpr unsigned int mmUMCCH2_PerfMonCtr5_Hi = 0x54353; - constexpr unsigned int mmUMCCH2_PerfMonCtr5_Lo = 0x54352; - constexpr unsigned int mmUMCCH2_PerfMonCtrClk_Hi = 0x54349; - constexpr unsigned int mmUMCCH2_PerfMonCtrClk_Lo = 0x54348; -} // namespace Gfx103CorePlus - -namespace Gfx103Derivative -{ - constexpr unsigned int mmSPI_IND_DATA = 0xF053; - constexpr unsigned int mmSPI_IND_INDEX = 0xF052; - constexpr unsigned int mmSPI_USER_ACCUM_VMID_CNTL = 0x244B; - constexpr unsigned int mmSQ_SHADER_TBA_HI = 0x2313; - constexpr unsigned int mmSQ_SHADER_TBA_LO = 0x2312; - constexpr unsigned int mmSQ_SHADER_TMA_HI = 0x2315; - constexpr unsigned int mmSQ_SHADER_TMA_LO = 0x2314; - constexpr unsigned int mmVGT_CACHE_INVALIDATION = 0x2220; -} // namespace Gfx103Derivative - -namespace Gfx103Plus -{ - constexpr unsigned int mmDB_SPI_VRS_CENTER_LOCATION = 0xA018; - constexpr unsigned int mmGE_VRS_RATE = 0xC263; - constexpr unsigned int mmPA_CL_VRS_CNTL = 0xA212; - constexpr unsigned int mmSPI_BARYC_SSAA_CNTL = 0xA1B7; - constexpr unsigned int mmSPI_CS_CRAWLER_CONFIG = 0x24F7; - constexpr unsigned int mmSPI_GFX_CRAWLER_CONFIG = 0x24F6; - constexpr unsigned int mmSPI_WF_LIFETIME_STATUS_21 = 0x24CB; -} // namespace Gfx103Plus - -namespace Gfx103PlusExclusive -{ - constexpr unsigned int mmDB_RESERVED_REG_1 = 0xA016; - constexpr unsigned int mmDB_RESERVED_REG_2 = 0xA00F; - constexpr unsigned int mmDB_RESERVED_REG_3 = 0xA017; - constexpr unsigned int mmGE1_PERFCOUNTER0_HI = 0xD0A5; - constexpr unsigned int mmGE1_PERFCOUNTER0_LO = 0xD0A4; - constexpr unsigned int mmGE1_PERFCOUNTER0_SELECT = 0xD8A4; - constexpr unsigned int mmGE1_PERFCOUNTER0_SELECT1 = 0xD8A5; - constexpr unsigned int mmGE1_PERFCOUNTER1_HI = 0xD0A7; - constexpr unsigned int mmGE1_PERFCOUNTER1_LO = 0xD0A6; - constexpr unsigned int mmGE1_PERFCOUNTER1_SELECT = 0xD8A6; - constexpr unsigned int mmGE1_PERFCOUNTER1_SELECT1 = 0xD8A7; - constexpr unsigned int mmGE1_PERFCOUNTER2_HI = 0xD0A9; - constexpr unsigned int mmGE1_PERFCOUNTER2_LO = 0xD0A8; - constexpr unsigned int mmGE1_PERFCOUNTER2_SELECT = 0xD8A8; - constexpr unsigned int mmGE1_PERFCOUNTER2_SELECT1 = 0xD8A9; - constexpr unsigned int mmGE1_PERFCOUNTER3_HI = 0xD0AB; - constexpr unsigned int mmGE1_PERFCOUNTER3_LO = 0xD0AA; - constexpr unsigned int mmGE1_PERFCOUNTER3_SELECT = 0xD8AA; - constexpr unsigned int mmGE1_PERFCOUNTER3_SELECT1 = 0xD8AB; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER0_HI = 0xD0AD; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER0_LO = 0xD0AC; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER0_SELECT = 0xD8AC; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER0_SELECT1 = 0xD8AD; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER1_HI = 0xD0AF; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER1_LO = 0xD0AE; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER1_SELECT = 0xD8AE; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER1_SELECT1 = 0xD8AF; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER2_HI = 0xD0B1; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER2_LO = 0xD0B0; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER2_SELECT = 0xD8B0; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER2_SELECT1 = 0xD8B1; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER3_HI = 0xD0B3; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER3_LO = 0xD0B2; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER3_SELECT = 0xD8B2; - constexpr unsigned int mmGE2_DIST_PERFCOUNTER3_SELECT1 = 0xD8B3; - constexpr unsigned int mmGE2_SE_PERFCOUNTER0_HI = 0xD0B5; - constexpr unsigned int mmGE2_SE_PERFCOUNTER0_LO = 0xD0B4; - constexpr unsigned int mmGE2_SE_PERFCOUNTER0_SELECT = 0xD8B4; - constexpr unsigned int mmGE2_SE_PERFCOUNTER0_SELECT1 = 0xD8B5; - constexpr unsigned int mmGE2_SE_PERFCOUNTER1_HI = 0xD0B7; - constexpr unsigned int mmGE2_SE_PERFCOUNTER1_LO = 0xD0B6; - constexpr unsigned int mmGE2_SE_PERFCOUNTER1_SELECT = 0xD8B6; - constexpr unsigned int mmGE2_SE_PERFCOUNTER1_SELECT1 = 0xD8B7; - constexpr unsigned int mmGE2_SE_PERFCOUNTER2_HI = 0xD0B9; - constexpr unsigned int mmGE2_SE_PERFCOUNTER2_LO = 0xD0B8; - constexpr unsigned int mmGE2_SE_PERFCOUNTER2_SELECT = 0xD8B8; - constexpr unsigned int mmGE2_SE_PERFCOUNTER2_SELECT1 = 0xD8B9; - constexpr unsigned int mmGE2_SE_PERFCOUNTER3_HI = 0xD0BB; - constexpr unsigned int mmGE2_SE_PERFCOUNTER3_LO = 0xD0BA; - constexpr unsigned int mmGE2_SE_PERFCOUNTER3_SELECT = 0xD8BA; - constexpr unsigned int mmGE2_SE_PERFCOUNTER3_SELECT1 = 0xD8BB; - constexpr unsigned int mmSPI_CONFIG_PS_CU_EN = 0x2452; - constexpr unsigned int mmSPI_EXP_THROTTLE_CTRL = 0x2723; - constexpr unsigned int mmSPI_WF_ACTIVE_COUNT_GFX = 0x24E8; - constexpr unsigned int mmSPI_WF_ACTIVE_COUNT_HPG = 0x24E9; - constexpr unsigned int mmSQ_CLK_CTRL = 0xF091; - constexpr unsigned int mmSQ_INTERRUPT_AUTO_MASK = 0x231E; - constexpr unsigned int mmSQ_INTERRUPT_MSG_CTRL = 0x231F; - constexpr unsigned int mmSX_PS_DOWNCONVERT_CONTROL = 0xA1D4; -} // namespace Gfx103PlusExclusive - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -namespace Gfx104Plus -{ - constexpr unsigned int mmGE_RATE_CNTL_1 = 0x2254; - constexpr unsigned int mmGE_RATE_CNTL_2 = 0x2255; - constexpr unsigned int mmPA_RATE_CNTL = 0xA188; - constexpr unsigned int mmPA_SC_BINNER_CNTL_2 = 0xA315; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_HIGHCOUNT = 0xDCA5; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_LOWCOUNT = 0xDCA4; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL = 0xC443; - constexpr unsigned int mmSQ_THREAD_TRACE_BUF0_BASE = 0xD9E8; - constexpr unsigned int mmSQ_THREAD_TRACE_BUF0_SIZE = 0xD9E9; - constexpr unsigned int mmSQ_THREAD_TRACE_BUF1_BASE = 0xD9EA; - constexpr unsigned int mmSQ_THREAD_TRACE_BUF1_SIZE = 0xD9EB; - constexpr unsigned int mmSQ_THREAD_TRACE_CTRL = 0xD9EC; - constexpr unsigned int mmSQ_THREAD_TRACE_DROPPED_CNTR = 0xD9FA; - constexpr unsigned int mmSQ_THREAD_TRACE_GFX_DRAW_CNTR = 0xD9F6; - constexpr unsigned int mmSQ_THREAD_TRACE_GFX_MARKER_CNTR = 0xD9F7; - constexpr unsigned int mmSQ_THREAD_TRACE_HP3D_DRAW_CNTR = 0xD9F8; - constexpr unsigned int mmSQ_THREAD_TRACE_HP3D_MARKER_CNTR = 0xD9F9; - constexpr unsigned int mmSQ_THREAD_TRACE_MASK = 0xD9ED; - constexpr unsigned int mmSQ_THREAD_TRACE_STATUS = 0xD9F4; - constexpr unsigned int mmSQ_THREAD_TRACE_STATUS2 = 0xD9F5; - constexpr unsigned int mmSQ_THREAD_TRACE_TOKEN_MASK = 0xD9EE; - constexpr unsigned int mmSQ_THREAD_TRACE_WPTR = 0xD9EF; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI = 0xC261; -} // namespace Gfx104Plus -#endif - -namespace Gfx10Core -{ - constexpr unsigned int mmCB_COLOR0_CMASK_SLICE = 0xA320; - constexpr unsigned int mmCB_COLOR0_FMASK_SLICE = 0xA322; - constexpr unsigned int mmCB_COLOR0_PITCH = 0xA319; - constexpr unsigned int mmCB_COLOR0_SLICE = 0xA31A; - constexpr unsigned int mmCB_COLOR1_CMASK_SLICE = 0xA32F; - constexpr unsigned int mmCB_COLOR1_FMASK_SLICE = 0xA331; - constexpr unsigned int mmCB_COLOR1_PITCH = 0xA328; - constexpr unsigned int mmCB_COLOR1_SLICE = 0xA329; - constexpr unsigned int mmCB_COLOR2_CMASK_SLICE = 0xA33E; - constexpr unsigned int mmCB_COLOR2_FMASK_SLICE = 0xA340; - constexpr unsigned int mmCB_COLOR2_PITCH = 0xA337; - constexpr unsigned int mmCB_COLOR2_SLICE = 0xA338; - constexpr unsigned int mmCB_COLOR3_CMASK_SLICE = 0xA34D; - constexpr unsigned int mmCB_COLOR3_FMASK_SLICE = 0xA34F; - constexpr unsigned int mmCB_COLOR3_PITCH = 0xA346; - constexpr unsigned int mmCB_COLOR3_SLICE = 0xA347; - constexpr unsigned int mmCB_COLOR4_CMASK_SLICE = 0xA35C; - constexpr unsigned int mmCB_COLOR4_FMASK_SLICE = 0xA35E; - constexpr unsigned int mmCB_COLOR4_PITCH = 0xA355; - constexpr unsigned int mmCB_COLOR4_SLICE = 0xA356; - constexpr unsigned int mmCB_COLOR5_CMASK_SLICE = 0xA36B; - constexpr unsigned int mmCB_COLOR5_FMASK_SLICE = 0xA36D; - constexpr unsigned int mmCB_COLOR5_PITCH = 0xA364; - constexpr unsigned int mmCB_COLOR5_SLICE = 0xA365; - constexpr unsigned int mmCB_COLOR6_CMASK_SLICE = 0xA37A; - constexpr unsigned int mmCB_COLOR6_FMASK_SLICE = 0xA37C; - constexpr unsigned int mmCB_COLOR6_PITCH = 0xA373; - constexpr unsigned int mmCB_COLOR6_SLICE = 0xA374; - constexpr unsigned int mmCB_COLOR7_CMASK_SLICE = 0xA389; - constexpr unsigned int mmCB_COLOR7_FMASK_SLICE = 0xA38B; - constexpr unsigned int mmCB_COLOR7_PITCH = 0xA382; - constexpr unsigned int mmCB_COLOR7_SLICE = 0xA383; - constexpr unsigned int mmCB_RMI_BC_GL2_CACHE_CONTROL = 0x268A; - constexpr unsigned int mmCB_STUTTER_CONTROL_CMASK_RDLAT = 0x268B; - constexpr unsigned int mmCB_STUTTER_CONTROL_COLOR_RDLAT = 0x268D; - constexpr unsigned int mmCB_STUTTER_CONTROL_FMASK_RDLAT = 0x268C; - constexpr unsigned int mmDB_DFSM_CONFIG = 0x2630; - constexpr unsigned int mmDB_DFSM_CONTROL = 0xA00E; - constexpr unsigned int mmDB_DFSM_FLUSH_AUX_EVENT = 0x2636; - constexpr unsigned int mmDB_DFSM_FLUSH_ENABLE = 0x2635; - constexpr unsigned int mmDB_DFSM_PRIMS_IN_FLIGHT = 0x2633; - constexpr unsigned int mmDB_DFSM_TILES_IN_FLIGHT = 0x2632; - constexpr unsigned int mmDB_DFSM_WATCHDOG = 0x2634; - constexpr unsigned int mmDB_RMI_BC_GL2_CACHE_CONTROL = 0x261E; - constexpr unsigned int mmGRBM_SE2_PERFCOUNTER_HI = 0xD04A; - constexpr unsigned int mmGRBM_SE2_PERFCOUNTER_LO = 0xD049; - constexpr unsigned int mmGRBM_SE2_PERFCOUNTER_SELECT = 0xD844; - constexpr unsigned int mmGRBM_SE3_PERFCOUNTER_HI = 0xD04C; - constexpr unsigned int mmGRBM_SE3_PERFCOUNTER_LO = 0xD04B; - constexpr unsigned int mmGRBM_SE3_PERFCOUNTER_SELECT = 0xD845; - constexpr unsigned int mmPA_SC_ENHANCE_INTERNAL = 0x22DD; - constexpr unsigned int mmRPB_PERFCOUNTER0_CFG = 0x0CEB; - constexpr unsigned int mmRPB_PERFCOUNTER1_CFG = 0x0CEC; - constexpr unsigned int mmRPB_PERFCOUNTER2_CFG = 0x0CED; - constexpr unsigned int mmRPB_PERFCOUNTER3_CFG = 0x0CEE; - constexpr unsigned int mmRPB_PERFCOUNTER_HI = 0x0CEA; - constexpr unsigned int mmRPB_PERFCOUNTER_LO = 0x0CE9; - constexpr unsigned int mmRPB_PERFCOUNTER_RSLT_CNTL = 0x0CEF; - constexpr unsigned int mmSQ_EDC_CNT = 0x23A6; - constexpr unsigned int mmSQ_EDC_FUE_CNTL = 0x23A7; - constexpr unsigned int mmSQ_THREAD_TRACE_BUF0_BASE = 0x2340; - constexpr unsigned int mmSQ_THREAD_TRACE_BUF0_SIZE = 0x2341; - constexpr unsigned int mmSQ_THREAD_TRACE_BUF1_BASE = 0x2342; - constexpr unsigned int mmSQ_THREAD_TRACE_BUF1_SIZE = 0x2343; - constexpr unsigned int mmSQ_THREAD_TRACE_CTRL = 0x2347; - constexpr unsigned int mmSQ_THREAD_TRACE_DROPPED_CNTR = 0x2349; - constexpr unsigned int mmSQ_THREAD_TRACE_GFX_DRAW_CNTR = 0x234B; - constexpr unsigned int mmSQ_THREAD_TRACE_GFX_MARKER_CNTR = 0x234C; - constexpr unsigned int mmSQ_THREAD_TRACE_HP3D_DRAW_CNTR = 0x234D; - constexpr unsigned int mmSQ_THREAD_TRACE_HP3D_MARKER_CNTR = 0x234E; - constexpr unsigned int mmSQ_THREAD_TRACE_MASK = 0x2345; - constexpr unsigned int mmSQ_THREAD_TRACE_STATUS = 0x2348; - constexpr unsigned int mmSQ_THREAD_TRACE_TOKEN_MASK = 0x2346; - constexpr unsigned int mmSQ_THREAD_TRACE_WPTR = 0x2344; - constexpr unsigned int mmSQ_TIME_HI = 0x237C; - constexpr unsigned int mmSQ_TIME_LO = 0x237D; - constexpr unsigned int mmSQ_WREXEC_EXEC_HI = 0x23B1; - constexpr unsigned int mmSQ_WREXEC_EXEC_LO = 0x23B1; - constexpr unsigned int mmTA_POWER_CNTL = 0x2540; - constexpr unsigned int mmTA_RESERVED_010C = 0x2543; - constexpr unsigned int mmVGT_DISPATCH_DRAW_INDEX = 0xA2DD; -} // namespace Gfx10Core - -namespace Gfx10CorePlus -{ - constexpr unsigned int mmGCEA_PERFCOUNTER2_HI = 0xD261; - constexpr unsigned int mmGCEA_PERFCOUNTER2_LO = 0xD260; - constexpr unsigned int mmGCEA_PERFCOUNTER2_MODE = 0xDA02; - constexpr unsigned int mmGCEA_PERFCOUNTER2_SELECT = 0xDA00; - constexpr unsigned int mmGCEA_PERFCOUNTER2_SELECT1 = 0xDA01; - constexpr unsigned int mmIA_UTCL1_STATUS_2 = 0x2237; -} // namespace Gfx10CorePlus - -namespace Gfx10Plus -{ - constexpr unsigned int mmCB_CACHE_EVICT_POINTS = 0x268E; - constexpr unsigned int mmCB_COLOR0_ATTRIB2 = 0xA3B0; - constexpr unsigned int mmCB_COLOR0_ATTRIB3 = 0xA3B8; - constexpr unsigned int mmCB_COLOR0_BASE_EXT = 0xA390; - constexpr unsigned int mmCB_COLOR0_DCC_BASE_EXT = 0xA3A8; - constexpr unsigned int mmCB_COLOR1_ATTRIB2 = 0xA3B1; - constexpr unsigned int mmCB_COLOR1_ATTRIB3 = 0xA3B9; - constexpr unsigned int mmCB_COLOR1_BASE_EXT = 0xA391; - constexpr unsigned int mmCB_COLOR1_DCC_BASE_EXT = 0xA3A9; - constexpr unsigned int mmCB_COLOR2_ATTRIB2 = 0xA3B2; - constexpr unsigned int mmCB_COLOR2_ATTRIB3 = 0xA3BA; - constexpr unsigned int mmCB_COLOR2_BASE_EXT = 0xA392; - constexpr unsigned int mmCB_COLOR2_DCC_BASE_EXT = 0xA3AA; - constexpr unsigned int mmCB_COLOR3_ATTRIB2 = 0xA3B3; - constexpr unsigned int mmCB_COLOR3_ATTRIB3 = 0xA3BB; - constexpr unsigned int mmCB_COLOR3_BASE_EXT = 0xA393; - constexpr unsigned int mmCB_COLOR3_DCC_BASE_EXT = 0xA3AB; - constexpr unsigned int mmCB_COLOR4_ATTRIB2 = 0xA3B4; - constexpr unsigned int mmCB_COLOR4_ATTRIB3 = 0xA3BC; - constexpr unsigned int mmCB_COLOR4_BASE_EXT = 0xA394; - constexpr unsigned int mmCB_COLOR4_DCC_BASE_EXT = 0xA3AC; - constexpr unsigned int mmCB_COLOR5_ATTRIB2 = 0xA3B5; - constexpr unsigned int mmCB_COLOR5_ATTRIB3 = 0xA3BD; - constexpr unsigned int mmCB_COLOR5_BASE_EXT = 0xA395; - constexpr unsigned int mmCB_COLOR5_DCC_BASE_EXT = 0xA3AD; - constexpr unsigned int mmCB_COLOR6_ATTRIB2 = 0xA3B6; - constexpr unsigned int mmCB_COLOR6_ATTRIB3 = 0xA3BE; - constexpr unsigned int mmCB_COLOR6_BASE_EXT = 0xA396; - constexpr unsigned int mmCB_COLOR6_DCC_BASE_EXT = 0xA3AE; - constexpr unsigned int mmCB_COLOR7_ATTRIB2 = 0xA3B7; - constexpr unsigned int mmCB_COLOR7_ATTRIB3 = 0xA3BF; - constexpr unsigned int mmCB_COLOR7_BASE_EXT = 0xA397; - constexpr unsigned int mmCB_COLOR7_DCC_BASE_EXT = 0xA3AF; - constexpr unsigned int mmCB_COVERAGE_OUT_CONTROL = 0xA10A; - constexpr unsigned int mmCB_DCC_CONFIG = 0x2687; - constexpr unsigned int mmCB_HW_CONTROL = 0x2684; - constexpr unsigned int mmCB_HW_CONTROL_1 = 0x2685; - constexpr unsigned int mmCB_HW_CONTROL_2 = 0x2686; - constexpr unsigned int mmCB_HW_CONTROL_4 = 0x2682; - constexpr unsigned int mmCB_HW_MEM_ARBITER_RD = 0x2688; - constexpr unsigned int mmCB_HW_MEM_ARBITER_WR = 0x2689; - constexpr unsigned int mmCB_RMI_GL2_CACHE_CONTROL = 0xA104; - constexpr unsigned int mmCHA_PERFCOUNTER0_HI = 0xD601; - constexpr unsigned int mmCHA_PERFCOUNTER0_LO = 0xD600; - constexpr unsigned int mmCHA_PERFCOUNTER0_SELECT = 0xDDE0; - constexpr unsigned int mmCHA_PERFCOUNTER0_SELECT1 = 0xDDE1; - constexpr unsigned int mmCHA_PERFCOUNTER1_HI = 0xD603; - constexpr unsigned int mmCHA_PERFCOUNTER1_LO = 0xD602; - constexpr unsigned int mmCHA_PERFCOUNTER1_SELECT = 0xDDE2; - constexpr unsigned int mmCHA_PERFCOUNTER2_HI = 0xD605; - constexpr unsigned int mmCHA_PERFCOUNTER2_LO = 0xD604; - constexpr unsigned int mmCHA_PERFCOUNTER2_SELECT = 0xDDE3; - constexpr unsigned int mmCHA_PERFCOUNTER3_HI = 0xD607; - constexpr unsigned int mmCHA_PERFCOUNTER3_LO = 0xD606; - constexpr unsigned int mmCHA_PERFCOUNTER3_SELECT = 0xDDE4; - constexpr unsigned int mmCHC_PERFCOUNTER0_HI = 0xD3C1; - constexpr unsigned int mmCHC_PERFCOUNTER0_LO = 0xD3C0; - constexpr unsigned int mmCHC_PERFCOUNTER0_SELECT = 0xDBC0; - constexpr unsigned int mmCHC_PERFCOUNTER0_SELECT1 = 0xDBC1; - constexpr unsigned int mmCHC_PERFCOUNTER1_HI = 0xD3C3; - constexpr unsigned int mmCHC_PERFCOUNTER1_LO = 0xD3C2; - constexpr unsigned int mmCHC_PERFCOUNTER1_SELECT = 0xDBC2; - constexpr unsigned int mmCHC_PERFCOUNTER2_HI = 0xD3C5; - constexpr unsigned int mmCHC_PERFCOUNTER2_LO = 0xD3C4; - constexpr unsigned int mmCHC_PERFCOUNTER2_SELECT = 0xDBC3; - constexpr unsigned int mmCHC_PERFCOUNTER3_HI = 0xD3C7; - constexpr unsigned int mmCHC_PERFCOUNTER3_LO = 0xD3C6; - constexpr unsigned int mmCHC_PERFCOUNTER3_SELECT = 0xDBC4; - constexpr unsigned int mmCOMPUTE_DDID_INDEX = 0x2E29; - constexpr unsigned int mmCOMPUTE_DESTINATION_EN_SE0 = 0x2E16; - constexpr unsigned int mmCOMPUTE_DESTINATION_EN_SE1 = 0x2E17; - constexpr unsigned int mmCOMPUTE_DESTINATION_EN_SE2 = 0x2E19; - constexpr unsigned int mmCOMPUTE_DESTINATION_EN_SE3 = 0x2E1A; - constexpr unsigned int mmCOMPUTE_DISPATCH_TUNNEL = 0x2E7D; - constexpr unsigned int mmCOMPUTE_PGM_RSRC3 = 0x2E28; - constexpr unsigned int mmCOMPUTE_REQ_CTRL = 0x2E22; - constexpr unsigned int mmCOMPUTE_SHADER_CHKSUM = 0x2E2A; - constexpr unsigned int mmCOMPUTE_USER_ACCUM_0 = 0x2E24; - constexpr unsigned int mmCOMPUTE_USER_ACCUM_1 = 0x2E25; - constexpr unsigned int mmCOMPUTE_USER_ACCUM_2 = 0x2E26; - constexpr unsigned int mmCOMPUTE_USER_ACCUM_3 = 0x2E27; - constexpr unsigned int mmDB_DEPTH_SIZE_XY = 0xA007; - constexpr unsigned int mmDB_EQUAD_STUTTER_CONTROL = 0x2612; - constexpr unsigned int mmDB_ETILE_STUTTER_CONTROL = 0x2610; - constexpr unsigned int mmDB_EXCEPTION_CONTROL = 0x261F; - constexpr unsigned int mmDB_FGCG_INTERFACES_CLK_CTRL = 0x2638; - constexpr unsigned int mmDB_FGCG_SRAMS_CLK_CTRL = 0x2637; - constexpr unsigned int mmDB_FIFO_DEPTH3 = 0x261D; - constexpr unsigned int mmDB_HTILE_DATA_BASE_HI = 0xA01E; - constexpr unsigned int mmDB_LAST_OF_BURST_CONFIG = 0x261A; - constexpr unsigned int mmDB_LQUAD_STUTTER_CONTROL = 0x2613; - constexpr unsigned int mmDB_LTILE_STUTTER_CONTROL = 0x2611; - constexpr unsigned int mmDB_RMI_L2_CACHE_CONTROL = 0xA01F; - constexpr unsigned int mmDB_STENCIL_INFO = 0xA011; - constexpr unsigned int mmDB_STENCIL_READ_BASE = 0xA013; - constexpr unsigned int mmDB_STENCIL_READ_BASE_HI = 0xA01B; - constexpr unsigned int mmDB_STENCIL_WRITE_BASE = 0xA015; - constexpr unsigned int mmDB_STENCIL_WRITE_BASE_HI = 0xA01D; - constexpr unsigned int mmDB_Z_INFO = 0xA010; - constexpr unsigned int mmDB_Z_READ_BASE = 0xA012; - constexpr unsigned int mmDB_Z_READ_BASE_HI = 0xA01A; - constexpr unsigned int mmDB_Z_WRITE_BASE_HI = 0xA01C; - constexpr unsigned int mmGCR_PERFCOUNTER0_HI = 0xD521; - constexpr unsigned int mmGCR_PERFCOUNTER0_LO = 0xD520; - constexpr unsigned int mmGCR_PERFCOUNTER0_SELECT = 0xDD60; - constexpr unsigned int mmGCR_PERFCOUNTER0_SELECT1 = 0xDD61; - constexpr unsigned int mmGCR_PERFCOUNTER1_HI = 0xD523; - constexpr unsigned int mmGCR_PERFCOUNTER1_LO = 0xD522; - constexpr unsigned int mmGCR_PERFCOUNTER1_SELECT = 0xDD62; - constexpr unsigned int mmGE_CNTL = 0xC25B; - constexpr unsigned int mmGE_INDX_OFFSET = 0xC24A; - constexpr unsigned int mmGE_MAX_OUTPUT_PER_SUBGROUP = 0xA1FF; - constexpr unsigned int mmGE_MAX_VTX_INDX = 0xC259; - constexpr unsigned int mmGE_MIN_VTX_INDX = 0xC249; - constexpr unsigned int mmGE_MULTI_PRIM_IB_RESET_EN = 0xC24B; - constexpr unsigned int mmGE_NGG_SUBGRP_CNTL = 0xA2D3; - constexpr unsigned int mmGE_PC_ALLOC = 0xC260; - constexpr unsigned int mmGE_PRIV_CONTROL = 0x2264; - constexpr unsigned int mmGE_STATUS = 0x2265; - constexpr unsigned int mmGE_STEREO_CNTL = 0xC25F; - constexpr unsigned int mmGE_USER_VGPR1 = 0xC25C; - constexpr unsigned int mmGE_USER_VGPR2 = 0xC25D; - constexpr unsigned int mmGE_USER_VGPR3 = 0xC25E; - constexpr unsigned int mmGE_USER_VGPR_EN = 0xC262; - constexpr unsigned int mmGL1A_PERFCOUNTER0_HI = 0xD5C1; - constexpr unsigned int mmGL1A_PERFCOUNTER0_LO = 0xD5C0; - constexpr unsigned int mmGL1A_PERFCOUNTER0_SELECT = 0xDDC0; - constexpr unsigned int mmGL1A_PERFCOUNTER0_SELECT1 = 0xDDC1; - constexpr unsigned int mmGL1A_PERFCOUNTER1_HI = 0xD5C3; - constexpr unsigned int mmGL1A_PERFCOUNTER1_LO = 0xD5C2; - constexpr unsigned int mmGL1A_PERFCOUNTER1_SELECT = 0xDDC2; - constexpr unsigned int mmGL1A_PERFCOUNTER2_HI = 0xD5C5; - constexpr unsigned int mmGL1A_PERFCOUNTER2_LO = 0xD5C4; - constexpr unsigned int mmGL1A_PERFCOUNTER2_SELECT = 0xDDC3; - constexpr unsigned int mmGL1A_PERFCOUNTER3_HI = 0xD5C7; - constexpr unsigned int mmGL1A_PERFCOUNTER3_LO = 0xD5C6; - constexpr unsigned int mmGL1A_PERFCOUNTER3_SELECT = 0xDDC4; - constexpr unsigned int mmGL1C_PERFCOUNTER0_HI = 0xD3A1; - constexpr unsigned int mmGL1C_PERFCOUNTER0_LO = 0xD3A0; - constexpr unsigned int mmGL1C_PERFCOUNTER0_SELECT = 0xDBA0; - constexpr unsigned int mmGL1C_PERFCOUNTER0_SELECT1 = 0xDBA1; - constexpr unsigned int mmGL1C_PERFCOUNTER1_HI = 0xD3A3; - constexpr unsigned int mmGL1C_PERFCOUNTER1_LO = 0xD3A2; - constexpr unsigned int mmGL1C_PERFCOUNTER1_SELECT = 0xDBA2; - constexpr unsigned int mmGL1C_PERFCOUNTER2_HI = 0xD3A5; - constexpr unsigned int mmGL1C_PERFCOUNTER2_LO = 0xD3A4; - constexpr unsigned int mmGL1C_PERFCOUNTER2_SELECT = 0xDBA3; - constexpr unsigned int mmGL1C_PERFCOUNTER3_HI = 0xD3A7; - constexpr unsigned int mmGL1C_PERFCOUNTER3_LO = 0xD3A6; - constexpr unsigned int mmGL1C_PERFCOUNTER3_SELECT = 0xDBA4; - constexpr unsigned int mmGL2A_PERFCOUNTER0_HI = 0xD391; - constexpr unsigned int mmGL2A_PERFCOUNTER0_LO = 0xD390; - constexpr unsigned int mmGL2A_PERFCOUNTER0_SELECT = 0xDB90; - constexpr unsigned int mmGL2A_PERFCOUNTER0_SELECT1 = 0xDB91; - constexpr unsigned int mmGL2A_PERFCOUNTER1_HI = 0xD393; - constexpr unsigned int mmGL2A_PERFCOUNTER1_LO = 0xD392; - constexpr unsigned int mmGL2A_PERFCOUNTER1_SELECT = 0xDB92; - constexpr unsigned int mmGL2A_PERFCOUNTER1_SELECT1 = 0xDB93; - constexpr unsigned int mmGL2A_PERFCOUNTER2_HI = 0xD395; - constexpr unsigned int mmGL2A_PERFCOUNTER2_LO = 0xD394; - constexpr unsigned int mmGL2A_PERFCOUNTER2_SELECT = 0xDB94; - constexpr unsigned int mmGL2A_PERFCOUNTER3_HI = 0xD397; - constexpr unsigned int mmGL2A_PERFCOUNTER3_LO = 0xD396; - constexpr unsigned int mmGL2A_PERFCOUNTER3_SELECT = 0xDB95; - constexpr unsigned int mmGL2C_PERFCOUNTER0_HI = 0xD381; - constexpr unsigned int mmGL2C_PERFCOUNTER0_LO = 0xD380; - constexpr unsigned int mmGL2C_PERFCOUNTER0_SELECT = 0xDB80; - constexpr unsigned int mmGL2C_PERFCOUNTER0_SELECT1 = 0xDB81; - constexpr unsigned int mmGL2C_PERFCOUNTER1_HI = 0xD383; - constexpr unsigned int mmGL2C_PERFCOUNTER1_LO = 0xD382; - constexpr unsigned int mmGL2C_PERFCOUNTER1_SELECT = 0xDB82; - constexpr unsigned int mmGL2C_PERFCOUNTER1_SELECT1 = 0xDB83; - constexpr unsigned int mmGL2C_PERFCOUNTER2_HI = 0xD385; - constexpr unsigned int mmGL2C_PERFCOUNTER2_LO = 0xD384; - constexpr unsigned int mmGL2C_PERFCOUNTER2_SELECT = 0xDB84; - constexpr unsigned int mmGL2C_PERFCOUNTER3_HI = 0xD387; - constexpr unsigned int mmGL2C_PERFCOUNTER3_LO = 0xD386; - constexpr unsigned int mmGL2C_PERFCOUNTER3_SELECT = 0xDB85; - constexpr unsigned int mmGRBM_PERFCOUNTER0_SELECT_HI = 0xD84D; - constexpr unsigned int mmGRBM_PERFCOUNTER1_SELECT_HI = 0xD84E; - constexpr unsigned int mmPA_PH_PERFCOUNTER0_HI = 0xD581; - constexpr unsigned int mmPA_PH_PERFCOUNTER0_LO = 0xD580; - constexpr unsigned int mmPA_PH_PERFCOUNTER0_SELECT = 0xDD80; - constexpr unsigned int mmPA_PH_PERFCOUNTER0_SELECT1 = 0xDD81; - constexpr unsigned int mmPA_PH_PERFCOUNTER1_HI = 0xD583; - constexpr unsigned int mmPA_PH_PERFCOUNTER1_LO = 0xD582; - constexpr unsigned int mmPA_PH_PERFCOUNTER1_SELECT = 0xDD82; - constexpr unsigned int mmPA_PH_PERFCOUNTER1_SELECT1 = 0xDD90; - constexpr unsigned int mmPA_PH_PERFCOUNTER2_HI = 0xD585; - constexpr unsigned int mmPA_PH_PERFCOUNTER2_LO = 0xD584; - constexpr unsigned int mmPA_PH_PERFCOUNTER2_SELECT = 0xDD83; - constexpr unsigned int mmPA_PH_PERFCOUNTER2_SELECT1 = 0xDD91; - constexpr unsigned int mmPA_PH_PERFCOUNTER3_HI = 0xD587; - constexpr unsigned int mmPA_PH_PERFCOUNTER3_LO = 0xD586; - constexpr unsigned int mmPA_PH_PERFCOUNTER3_SELECT = 0xDD84; - constexpr unsigned int mmPA_PH_PERFCOUNTER3_SELECT1 = 0xDD92; - constexpr unsigned int mmPA_PH_PERFCOUNTER4_HI = 0xD589; - constexpr unsigned int mmPA_PH_PERFCOUNTER4_LO = 0xD588; - constexpr unsigned int mmPA_PH_PERFCOUNTER4_SELECT = 0xDD85; - constexpr unsigned int mmPA_PH_PERFCOUNTER5_HI = 0xD58B; - constexpr unsigned int mmPA_PH_PERFCOUNTER5_LO = 0xD58A; - constexpr unsigned int mmPA_PH_PERFCOUNTER5_SELECT = 0xDD86; - constexpr unsigned int mmPA_PH_PERFCOUNTER6_HI = 0xD58D; - constexpr unsigned int mmPA_PH_PERFCOUNTER6_LO = 0xD58C; - constexpr unsigned int mmPA_PH_PERFCOUNTER6_SELECT = 0xDD87; - constexpr unsigned int mmPA_PH_PERFCOUNTER7_HI = 0xD58F; - constexpr unsigned int mmPA_PH_PERFCOUNTER7_LO = 0xD58E; - constexpr unsigned int mmPA_PH_PERFCOUNTER7_SELECT = 0xDD88; - constexpr unsigned int mmPA_STATE_STEREO_X = 0xA211; - constexpr unsigned int mmPA_STEREO_CNTL = 0xA210; - constexpr unsigned int mmPA_SU_PERFCOUNTER2_SELECT1 = 0xD905; - constexpr unsigned int mmPA_SU_PERFCOUNTER3_SELECT = 0xD906; - constexpr unsigned int mmPA_SU_PERFCOUNTER3_SELECT1 = 0xD907; - constexpr unsigned int mmRLC_SPM_ACCUM_CTRL = 0xDC9A; - constexpr unsigned int mmRLC_SPM_ACCUM_DATARAM_WRCOUNT = 0xDC9E; - constexpr unsigned int mmRLC_SPM_ACCUM_MODE = 0xDC9B; - constexpr unsigned int mmRLC_SPM_ACCUM_SAMPLES_REQUESTED = 0xDC9D; - constexpr unsigned int mmRLC_SPM_ACCUM_STATUS = 0xDC99; - constexpr unsigned int mmRLC_SPM_ACCUM_THRESHOLD = 0xDC9C; - constexpr unsigned int mmRLC_SPM_RING_RDPTR = 0xDC85; - constexpr unsigned int mmRLC_SPM_SEGMENT_THRESHOLD = 0xDC86; - constexpr unsigned int mmRLC_SPM_THREAD_TRACE_CTRL = 0xEDE6; - constexpr unsigned int mmSPI_LB_DATA_PERWGP_WAVE_CS = 0x24E7; - constexpr unsigned int mmSPI_LB_DATA_PERWGP_WAVE_HSGS = 0x24E5; - constexpr unsigned int mmSPI_LB_WGP_MASK = 0x24D5; - constexpr unsigned int mmSPI_PG_ENABLE_STATIC_WGP_MASK = 0x24D7; - constexpr unsigned int mmSPI_PQEV_CTRL = 0x2720; - constexpr unsigned int mmSPI_SHADER_IDX_FORMAT = 0xA1C2; - constexpr unsigned int mmSPI_SHADER_PGM_HI_ES = 0x2CC9; - constexpr unsigned int mmSPI_SHADER_PGM_HI_ES_GS = 0x2C85; - constexpr unsigned int mmSPI_SHADER_PGM_HI_LS = 0x2D49; - constexpr unsigned int mmSPI_SHADER_PGM_HI_LS_HS = 0x2D05; - constexpr unsigned int mmSPI_SHADER_PGM_LO_ES = 0x2CC8; - constexpr unsigned int mmSPI_SHADER_PGM_LO_ES_GS = 0x2C84; - constexpr unsigned int mmSPI_SHADER_PGM_LO_LS = 0x2D48; - constexpr unsigned int mmSPI_SHADER_PGM_LO_LS_HS = 0x2D04; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC4_PS = 0x2C01; - constexpr unsigned int mmSPI_SHADER_REQ_CTRL_ESGS = 0x2CB0; - constexpr unsigned int mmSPI_SHADER_REQ_CTRL_LSHS = 0x2D30; - constexpr unsigned int mmSPI_SHADER_REQ_CTRL_PS = 0x2C30; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_ESGS_0 = 0x2CB2; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_ESGS_1 = 0x2CB3; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_ESGS_2 = 0x2CB4; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_ESGS_3 = 0x2CB5; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_LSHS_0 = 0x2D32; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_LSHS_1 = 0x2D33; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_LSHS_2 = 0x2D34; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_LSHS_3 = 0x2D35; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_PS_0 = 0x2C32; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_PS_1 = 0x2C33; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_PS_2 = 0x2C34; - constexpr unsigned int mmSPI_SHADER_USER_ACCUM_PS_3 = 0x2C35; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_0 = 0x2C8C; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_1 = 0x2C8D; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_2 = 0x2C8E; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_3 = 0x2C8F; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_4 = 0x2C90; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_5 = 0x2C91; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_6 = 0x2C92; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_7 = 0x2C93; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_8 = 0x2C94; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_9 = 0x2C95; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_10 = 0x2C96; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_11 = 0x2C97; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_12 = 0x2C98; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_13 = 0x2C99; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_14 = 0x2C9A; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_15 = 0x2C9B; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_16 = 0x2C9C; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_17 = 0x2C9D; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_18 = 0x2C9E; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_19 = 0x2C9F; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_20 = 0x2CA0; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_21 = 0x2CA1; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_22 = 0x2CA2; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_23 = 0x2CA3; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_24 = 0x2CA4; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_25 = 0x2CA5; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_26 = 0x2CA6; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_27 = 0x2CA7; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_28 = 0x2CA8; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_29 = 0x2CA9; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_30 = 0x2CAA; - constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_31 = 0x2CAB; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_0 = 0x2D0C; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_1 = 0x2D0D; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_2 = 0x2D0E; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_3 = 0x2D0F; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_4 = 0x2D10; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_5 = 0x2D11; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_6 = 0x2D12; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_7 = 0x2D13; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_8 = 0x2D14; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_9 = 0x2D15; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_10 = 0x2D16; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_11 = 0x2D17; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_12 = 0x2D18; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_13 = 0x2D19; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_14 = 0x2D1A; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_15 = 0x2D1B; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_16 = 0x2D1C; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_17 = 0x2D1D; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_18 = 0x2D1E; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_19 = 0x2D1F; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_20 = 0x2D20; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_21 = 0x2D21; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_22 = 0x2D22; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_23 = 0x2D23; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_24 = 0x2D24; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_25 = 0x2D25; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_26 = 0x2D26; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_27 = 0x2D27; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_28 = 0x2D28; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_29 = 0x2D29; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_30 = 0x2D2A; - constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_31 = 0x2D2B; - constexpr unsigned int mmSQG_CONFIG = 0x231A; - constexpr unsigned int mmSQG_STATUS = 0x2304; - constexpr unsigned int mmSQ_ARB_CONFIG = 0x230C; - constexpr unsigned int mmSQ_THREAD_TRACE_USERDATA_4 = 0xC344; - constexpr unsigned int mmSQ_THREAD_TRACE_USERDATA_5 = 0xC345; - constexpr unsigned int mmSQ_THREAD_TRACE_USERDATA_6 = 0xC346; - constexpr unsigned int mmSQ_THREAD_TRACE_USERDATA_7 = 0xC347; - constexpr unsigned int mmSQ_WATCH0_ADDR_H = 0x2330; - constexpr unsigned int mmSQ_WATCH0_ADDR_L = 0x2331; - constexpr unsigned int mmSQ_WATCH0_CNTL = 0x2332; - constexpr unsigned int mmSQ_WATCH1_ADDR_H = 0x2333; - constexpr unsigned int mmSQ_WATCH1_ADDR_L = 0x2334; - constexpr unsigned int mmSQ_WATCH1_CNTL = 0x2335; - constexpr unsigned int mmSQ_WATCH2_ADDR_H = 0x2336; - constexpr unsigned int mmSQ_WATCH2_ADDR_L = 0x2337; - constexpr unsigned int mmSQ_WATCH2_CNTL = 0x2338; - constexpr unsigned int mmSQ_WATCH3_ADDR_H = 0x2339; - constexpr unsigned int mmSQ_WATCH3_ADDR_L = 0x233A; - constexpr unsigned int mmSQ_WATCH3_CNTL = 0x233B; -} // namespace Gfx10Plus - -namespace Gfx10Vrs -{ - constexpr unsigned int mmCB_CGTT_SCLK_CTRL1 = 0xF0A9; - constexpr unsigned int mmDB_VRS_OVERRIDE_CNTL = 0xA019; - constexpr unsigned int mmPA_SC_ENHANCE_3 = 0x22E5; - constexpr unsigned int mmRLC_SPM_ACCUM_CTRLRAM_ADDR_OFFSET = 0xDCAB; - constexpr unsigned int mmRLC_SPM_ACCUM_DATARAM_32BITCNTRS_REGIONS = 0xDCAD; - constexpr unsigned int mmRLC_SPM_ACCUM_SWA_DATARAM_ADDR = 0xDCA9; - constexpr unsigned int mmRLC_SPM_ACCUM_SWA_DATARAM_DATA = 0xDCAA; - constexpr unsigned int mmRLC_SPM_GLOBAL_MUXSEL_ADDR_OFFSET = 0xDCA7; - constexpr unsigned int mmRLC_SPM_PERFMON_SWA_GLB_SEGMENT_SIZE = 0xDCAC; - constexpr unsigned int mmRLC_SPM_PERFMON_SWA_SE3TO0_SEGMENT_SIZE = 0xDCA6; - constexpr unsigned int mmRLC_SPM_PERFMON_SWA_SEGMENT_SIZE = 0xDCA2; - constexpr unsigned int mmRLC_SPM_SE_MUXSEL_ADDR_OFFSET = 0xDCA8; -} // namespace Gfx10Vrs - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -namespace Gfx11 -{ - constexpr unsigned int mmCB_DCC_CONFIG2 = 0x268B; - constexpr unsigned int mmCB_FDCC_CONTROL = 0xA109; - constexpr unsigned int mmCB_FGCG_SRAM_OVERRIDE = 0x268A; - constexpr unsigned int mmCB_KEY_OVERRIDE_0 = 0x267A; - constexpr unsigned int mmCB_KEY_OVERRIDE_1 = 0x267B; - constexpr unsigned int mmCB_KEY_OVERRIDE_2 = 0x267C; - constexpr unsigned int mmCB_KEY_OVERRIDE_3 = 0x267D; - constexpr unsigned int mmCB_KEY_OVERRIDE_4 = 0x267E; - constexpr unsigned int mmCB_KEY_OVERRIDE_5 = 0x267F; - constexpr unsigned int mmCB_KEY_OVERRIDE_6 = 0x2680; - constexpr unsigned int mmCB_KEY_OVERRIDE_7 = 0x2681; - constexpr unsigned int mmCOMPUTE_DISPATCH_INTERLEAVE = 0x2E2F; - constexpr unsigned int mmCOMPUTE_RELAUNCH = 0x2E30; - constexpr unsigned int mmCOMPUTE_RELAUNCH2 = 0x2E33; - constexpr unsigned int mmCOMPUTE_STATIC_THREAD_MGMT_SE4 = 0x2E2B; - constexpr unsigned int mmCOMPUTE_STATIC_THREAD_MGMT_SE5 = 0x2E2C; - constexpr unsigned int mmCOMPUTE_STATIC_THREAD_MGMT_SE6 = 0x2E2D; - constexpr unsigned int mmCOMPUTE_STATIC_THREAD_MGMT_SE7 = 0x2E2E; - constexpr unsigned int mmCOMPUTE_WAVE_RESTORE_ADDR_HI = 0x2E32; - constexpr unsigned int mmCOMPUTE_WAVE_RESTORE_ADDR_LO = 0x2E31; - constexpr unsigned int mmCP_VGT_ASINVOC_COUNT_HI = 0xC033; - constexpr unsigned int mmCP_VGT_ASINVOC_COUNT_LO = 0xC032; - constexpr unsigned int mmDB_FIFO_DEPTH4 = 0x2639; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER0_CFG = 0xDD30; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER1_CFG = 0xDD31; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER2_CFG = 0xDD32; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER3_CFG = 0xDD33; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER4_CFG = 0xDD34; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER5_CFG = 0xDD35; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER6_CFG = 0xDD36; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER7_CFG = 0xDD37; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER_HI = 0xD4E5; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER_LO = 0xD4E4; - constexpr unsigned int mmGCMC_VM_L2_PERFCOUNTER_RSLT_CNTL = 0xDD38; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_0_HI = 0xD4E2; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_0_LO = 0xD4E0; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_0_MODE = 0xDD24; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_0_SELECT = 0xDD20; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_0_SELECT1 = 0xDD22; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_1_HI = 0xD4E3; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_1_LO = 0xD4E1; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_1_MODE = 0xDD25; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_1_SELECT = 0xDD21; - constexpr unsigned int mmGCVML2_PERFCOUNTER2_1_SELECT1 = 0xDD23; - constexpr unsigned int mmGE_GS_FAST_LAUNCH_WG_DIM = 0xC264; - constexpr unsigned int mmGE_GS_FAST_LAUNCH_WG_DIM_1 = 0xC265; - constexpr unsigned int mmGE_PA_IF_SAFE_REG = 0x2279; - constexpr unsigned int mmGE_SPI_IF_SAFE_REG = 0x2278; - constexpr unsigned int mmPA_PH_ENHANCE = 0xA95F; - constexpr unsigned int mmPA_PH_INTERFACE_FIFO_SIZE = 0xA95E; - constexpr unsigned int mmPA_SC_ATM_CNTL = 0xA94D; - constexpr unsigned int mmPA_SC_BINNER_CNTL_OVERRIDE = 0xA946; - constexpr unsigned int mmPA_SC_BINNER_EVENT_CNTL_0 = 0xA950; - constexpr unsigned int mmPA_SC_BINNER_EVENT_CNTL_1 = 0xA951; - constexpr unsigned int mmPA_SC_BINNER_EVENT_CNTL_2 = 0xA952; - constexpr unsigned int mmPA_SC_BINNER_EVENT_CNTL_3 = 0xA953; - constexpr unsigned int mmPA_SC_BINNER_PERF_CNTL_0 = 0xA955; - constexpr unsigned int mmPA_SC_BINNER_PERF_CNTL_1 = 0xA956; - constexpr unsigned int mmPA_SC_BINNER_PERF_CNTL_2 = 0xA957; - constexpr unsigned int mmPA_SC_BINNER_PERF_CNTL_3 = 0xA958; - constexpr unsigned int mmPA_SC_BINNER_TIMEOUT_COUNTER = 0xA954; - constexpr unsigned int mmPA_SC_DSM_CNTL = 0xA948; - constexpr unsigned int mmPA_SC_ENHANCE = 0xA941; - constexpr unsigned int mmPA_SC_ENHANCE_1 = 0xA942; - constexpr unsigned int mmPA_SC_ENHANCE_2 = 0xA943; - constexpr unsigned int mmPA_SC_ENHANCE_3 = 0xA944; - constexpr unsigned int mmPA_SC_FIFO_SIZE = 0xA94A; - constexpr unsigned int mmPA_SC_FORCE_EOV_MAX_CNTS = 0xA94F; - constexpr unsigned int mmPA_SC_HP3D_TRAP_SCREEN_HV_LOCK = 0xA95C; - constexpr unsigned int mmPA_SC_IF_FIFO_SIZE = 0xA94B; - constexpr unsigned int mmPA_SC_P3D_TRAP_SCREEN_HV_LOCK = 0xA95B; - constexpr unsigned int mmPA_SC_PACKER_WAVE_ID_CNTL = 0xA94C; - constexpr unsigned int mmPA_SC_PBB_OVERRIDE_FLAG = 0xA947; - constexpr unsigned int mmPA_SC_PKR_WAVE_TABLE_CNTL = 0xA94E; - constexpr unsigned int mmPA_SC_TILE_STEERING_CREST_OVERRIDE = 0xA949; - constexpr unsigned int mmPA_SC_TRAP_SCREEN_HV_LOCK = 0xA95D; - constexpr unsigned int mmPA_SC_VRS_OVERRIDE_CNTL = 0xA0F4; - constexpr unsigned int mmPA_SC_VRS_RATE_BASE = 0xA0FC; - constexpr unsigned int mmPA_SC_VRS_RATE_BASE_EXT = 0xA0FD; - constexpr unsigned int mmPA_SC_VRS_RATE_CACHE_CNTL = 0xA0F9; - constexpr unsigned int mmPA_SC_VRS_RATE_FEEDBACK_BASE = 0xA0F5; - constexpr unsigned int mmPA_SC_VRS_RATE_FEEDBACK_BASE_EXT = 0xA0F6; - constexpr unsigned int mmPA_SC_VRS_RATE_FEEDBACK_SIZE_XY = 0xA0F7; - constexpr unsigned int mmPA_SC_VRS_RATE_SIZE_XY = 0xA0FE; - constexpr unsigned int mmPA_SC_VRS_SURFACE_CNTL = 0xA940; - constexpr unsigned int mmPA_SC_VRS_SURFACE_CNTL_1 = 0xA960; - constexpr unsigned int mmPerfMonCtl1 = 0x14341; - constexpr unsigned int mmPerfMonCtl2 = 0x14342; - constexpr unsigned int mmPerfMonCtl3 = 0x14343; - constexpr unsigned int mmPerfMonCtl4 = 0x14344; - constexpr unsigned int mmPerfMonCtl5 = 0x14345; - constexpr unsigned int mmPerfMonCtlClk = 0x14340; - constexpr unsigned int mmPerfMonCtr1_Hi = 0x1434B; - constexpr unsigned int mmPerfMonCtr1_Lo = 0x1434A; - constexpr unsigned int mmPerfMonCtr2_Hi = 0x1434D; - constexpr unsigned int mmPerfMonCtr2_Lo = 0x1434C; - constexpr unsigned int mmPerfMonCtr3_Hi = 0x1434F; - constexpr unsigned int mmPerfMonCtr3_Lo = 0x1434E; - constexpr unsigned int mmPerfMonCtr4_Hi = 0x14351; - constexpr unsigned int mmPerfMonCtr4_Lo = 0x14350; - constexpr unsigned int mmPerfMonCtr5_Hi = 0x14353; - constexpr unsigned int mmPerfMonCtr5_Lo = 0x14352; - constexpr unsigned int mmPerfMonCtrClk_Hi = 0x14349; - constexpr unsigned int mmPerfMonCtrClk_Lo = 0x14348; - constexpr unsigned int mmRLC_SPM_ACCUM_CTRLRAM_ADDR = 0xDC96; - constexpr unsigned int mmRLC_SPM_ACCUM_CTRLRAM_ADDR_OFFSET = 0xDC98; - constexpr unsigned int mmRLC_SPM_ACCUM_CTRLRAM_DATA = 0xDC97; - constexpr unsigned int mmRLC_SPM_ACCUM_DATARAM_32BITCNTRS_REGIONS = 0xDC9F; - constexpr unsigned int mmRLC_SPM_ACCUM_DATARAM_ADDR = 0xDC92; - constexpr unsigned int mmRLC_SPM_ACCUM_DATARAM_DATA = 0xDC93; - constexpr unsigned int mmRLC_SPM_ACCUM_SWA_DATARAM_ADDR = 0xDC94; - constexpr unsigned int mmRLC_SPM_ACCUM_SWA_DATARAM_DATA = 0xDC95; - constexpr unsigned int mmRLC_SPM_GLOBAL_DELAY_IND_ADDR = 0xED64; - constexpr unsigned int mmRLC_SPM_GLOBAL_DELAY_IND_DATA = 0xED65; - constexpr unsigned int mmRLC_SPM_GLOBAL_MUXSEL_ADDR = 0xDC88; - constexpr unsigned int mmRLC_SPM_GLOBAL_MUXSEL_DATA = 0xDC89; - constexpr unsigned int mmRLC_SPM_INT_CNTL = 0xA983; - constexpr unsigned int mmRLC_SPM_INT_INFO_1 = 0xA985; - constexpr unsigned int mmRLC_SPM_INT_INFO_2 = 0xA986; - constexpr unsigned int mmRLC_SPM_INT_STATUS = 0xA984; - constexpr unsigned int mmRLC_SPM_MC_CNTL = 0xA982; - constexpr unsigned int mmRLC_SPM_MODE = 0xDCAD; - constexpr unsigned int mmRLC_SPM_PAUSE = 0xDCA2; - constexpr unsigned int mmRLC_SPM_PERFMON_SEGMENT_SIZE = 0xDC87; - constexpr unsigned int mmRLC_SPM_RING_WRPTR = 0xDC84; - constexpr unsigned int mmRLC_SPM_RSPM_CMD = 0xDCB8; - constexpr unsigned int mmRLC_SPM_RSPM_CMD_ACK = 0xDCB9; - constexpr unsigned int mmRLC_SPM_RSPM_REQ_DATA_HI = 0xDCAF; - constexpr unsigned int mmRLC_SPM_RSPM_REQ_DATA_LO = 0xDCAE; - constexpr unsigned int mmRLC_SPM_RSPM_REQ_OP = 0xDCB0; - constexpr unsigned int mmRLC_SPM_RSPM_RET_DATA = 0xDCB1; - constexpr unsigned int mmRLC_SPM_RSPM_RET_OP = 0xDCB2; - constexpr unsigned int mmRLC_SPM_SAMPLE_CNT = 0xA981; - constexpr unsigned int mmRLC_SPM_SE_DELAY_IND_ADDR = 0xED66; - constexpr unsigned int mmRLC_SPM_SE_DELAY_IND_DATA = 0xED67; - constexpr unsigned int mmRLC_SPM_SE_MUXSEL_ADDR = 0xDC8A; - constexpr unsigned int mmRLC_SPM_SE_MUXSEL_DATA = 0xDC8B; - constexpr unsigned int mmRLC_SPM_SE_RSPM_REQ_DATA_HI = 0xDCB4; - constexpr unsigned int mmRLC_SPM_SE_RSPM_REQ_DATA_LO = 0xDCB3; - constexpr unsigned int mmRLC_SPM_SE_RSPM_REQ_OP = 0xDCB5; - constexpr unsigned int mmRLC_SPM_SE_RSPM_RET_DATA = 0xDCB6; - constexpr unsigned int mmRLC_SPM_SE_RSPM_RET_OP = 0xDCB7; - constexpr unsigned int mmRLC_SPM_SPARE = 0xDCBF; - constexpr unsigned int mmRLC_SPM_STATUS = 0xDCA3; - constexpr unsigned int mmSPI_ARB_CNTL_0 = 0xB949; - constexpr unsigned int mmSPI_ATTRIBUTE_RING_BASE = 0xC446; - constexpr unsigned int mmSPI_ATTRIBUTE_RING_SIZE = 0xC447; - constexpr unsigned int mmSPI_COMPUTE_QUEUE_RESET = 0x31D3; - constexpr unsigned int mmSPI_COMPUTE_WF_CTX_SAVE = 0x31D4; - constexpr unsigned int mmSPI_COMPUTE_WF_CTX_SAVE_STATUS = 0xB94E; - constexpr unsigned int mmSPI_FEATURE_CTRL = 0xB94A; - constexpr unsigned int mmSPI_GFX_SCRATCH_BASE_HI = 0xA1BC; - constexpr unsigned int mmSPI_GFX_SCRATCH_BASE_LO = 0xA1BB; - constexpr unsigned int mmSPI_GS_THROTTLE_CNTL1 = 0xC444; - constexpr unsigned int mmSPI_GS_THROTTLE_CNTL2 = 0xC445; - constexpr unsigned int mmSPI_LB_DATA_PERWGP_WAVE_PS = 0x24E6; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_0 = 0xBC00; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_1 = 0xBC01; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_2 = 0xBC02; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_3 = 0xBC03; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_4 = 0xBC04; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_5 = 0xBC05; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_6 = 0xBC06; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_7 = 0xBC07; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_8 = 0xBC08; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_9 = 0xBC09; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_10 = 0xBC0A; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_11 = 0xBC0B; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_12 = 0xBC0C; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_13 = 0xBC0D; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_14 = 0xBC0E; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_15 = 0xBC0F; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_0 = 0xBC10; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_1 = 0xBC11; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_2 = 0xBC12; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_3 = 0xBC13; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_4 = 0xBC14; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_5 = 0xBC15; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_6 = 0xBC16; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_7 = 0xBC17; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_8 = 0xBC18; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_9 = 0xBC19; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_10 = 0xBC1A; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_11 = 0xBC1B; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_12 = 0xBC1C; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_13 = 0xBC1D; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_14 = 0xBC1E; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_15 = 0xBC1F; - constexpr unsigned int mmSPI_SHADER_GS_MESHLET_DIM = 0x2CAC; - constexpr unsigned int mmSPI_SHADER_GS_MESHLET_EXP_ALLOC = 0x2CAD; - constexpr unsigned int mmSPI_SHADER_RSRC_LIMIT_CTRL = 0xB94B; - constexpr unsigned int mmSPI_USER_ACCUM_VMID_CNTL = 0x31D1; - constexpr unsigned int mmSQG_GL1H_STATUS = 0x2319; - constexpr unsigned int mmSQG_PERFCOUNTER0_HI = 0xD1E5; - constexpr unsigned int mmSQG_PERFCOUNTER0_LO = 0xD1E4; - constexpr unsigned int mmSQG_PERFCOUNTER0_SELECT = 0xD9D0; - constexpr unsigned int mmSQG_PERFCOUNTER1_HI = 0xD1E7; - constexpr unsigned int mmSQG_PERFCOUNTER1_LO = 0xD1E6; - constexpr unsigned int mmSQG_PERFCOUNTER1_SELECT = 0xD9D1; - constexpr unsigned int mmSQG_PERFCOUNTER2_HI = 0xD1E9; - constexpr unsigned int mmSQG_PERFCOUNTER2_LO = 0xD1E8; - constexpr unsigned int mmSQG_PERFCOUNTER2_SELECT = 0xD9D2; - constexpr unsigned int mmSQG_PERFCOUNTER3_HI = 0xD1EB; - constexpr unsigned int mmSQG_PERFCOUNTER3_LO = 0xD1EA; - constexpr unsigned int mmSQG_PERFCOUNTER3_SELECT = 0xD9D3; - constexpr unsigned int mmSQG_PERFCOUNTER4_HI = 0xD1ED; - constexpr unsigned int mmSQG_PERFCOUNTER4_LO = 0xD1EC; - constexpr unsigned int mmSQG_PERFCOUNTER4_SELECT = 0xD9D4; - constexpr unsigned int mmSQG_PERFCOUNTER5_HI = 0xD1EF; - constexpr unsigned int mmSQG_PERFCOUNTER5_LO = 0xD1EE; - constexpr unsigned int mmSQG_PERFCOUNTER5_SELECT = 0xD9D5; - constexpr unsigned int mmSQG_PERFCOUNTER6_HI = 0xD1F1; - constexpr unsigned int mmSQG_PERFCOUNTER6_LO = 0xD1F0; - constexpr unsigned int mmSQG_PERFCOUNTER6_SELECT = 0xD9D6; - constexpr unsigned int mmSQG_PERFCOUNTER7_HI = 0xD1F3; - constexpr unsigned int mmSQG_PERFCOUNTER7_LO = 0xD1F2; - constexpr unsigned int mmSQG_PERFCOUNTER7_SELECT = 0xD9D7; - constexpr unsigned int mmSQG_PERFCOUNTER_CTRL = 0xD9D8; - constexpr unsigned int mmSQG_PERFCOUNTER_CTRL2 = 0xD9DA; - constexpr unsigned int mmSQG_PERF_SAMPLE_FINISH = 0xD9DB; - constexpr unsigned int mmSQ_PERF_SNAPSHOT_CTRL = 0x231B; - constexpr unsigned int mmSQ_RUNTIME_CONFIG = 0xA9E0; - constexpr unsigned int mmSQ_SHADER_TBA_HI = 0xA9E7; - constexpr unsigned int mmSQ_SHADER_TBA_LO = 0xA9E6; - constexpr unsigned int mmSQ_SHADER_TMA_HI = 0xA9E9; - constexpr unsigned int mmSQ_SHADER_TMA_LO = 0xA9E8; - constexpr unsigned int mmTA_CNTL2 = 0x2545; - constexpr unsigned int mmTCP_PERFCOUNTER_FILTER = 0xD348; - constexpr unsigned int mmTCP_PERFCOUNTER_FILTER2 = 0xD349; - constexpr unsigned int mmTCP_PERFCOUNTER_FILTER_EN = 0xD34A; - constexpr unsigned int mmUMCCH1_PerfMonCtl1 = 0x14741; - constexpr unsigned int mmUMCCH1_PerfMonCtl2 = 0x14742; - constexpr unsigned int mmUMCCH1_PerfMonCtl3 = 0x14743; - constexpr unsigned int mmUMCCH1_PerfMonCtl4 = 0x14744; - constexpr unsigned int mmUMCCH1_PerfMonCtl5 = 0x14745; - constexpr unsigned int mmUMCCH1_PerfMonCtlClk = 0x14740; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Hi = 0x1474B; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Lo = 0x1474A; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Hi = 0x1474D; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Lo = 0x1474C; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Hi = 0x1474F; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Lo = 0x1474E; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Hi = 0x14751; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Lo = 0x14750; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Hi = 0x14753; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Lo = 0x14752; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Hi = 0x14749; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Lo = 0x14748; - constexpr unsigned int mmUMCCH3_PerfMonCtl1 = 0x54741; - constexpr unsigned int mmUMCCH3_PerfMonCtl2 = 0x54742; - constexpr unsigned int mmUMCCH3_PerfMonCtl3 = 0x54743; - constexpr unsigned int mmUMCCH3_PerfMonCtl4 = 0x54744; - constexpr unsigned int mmUMCCH3_PerfMonCtl5 = 0x54745; - constexpr unsigned int mmUMCCH3_PerfMonCtlClk = 0x54740; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Hi = 0x5474B; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Lo = 0x5474A; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Hi = 0x5474D; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Lo = 0x5474C; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Hi = 0x5474F; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Lo = 0x5474E; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Hi = 0x54751; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Lo = 0x54750; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Hi = 0x54753; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Lo = 0x54752; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Hi = 0x54749; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Lo = 0x54748; - constexpr unsigned int mmUMCCH4_PerfMonCtl1 = 0x94341; - constexpr unsigned int mmUMCCH4_PerfMonCtl2 = 0x94342; - constexpr unsigned int mmUMCCH4_PerfMonCtl3 = 0x94343; - constexpr unsigned int mmUMCCH4_PerfMonCtl4 = 0x94344; - constexpr unsigned int mmUMCCH4_PerfMonCtl5 = 0x94345; - constexpr unsigned int mmUMCCH4_PerfMonCtlClk = 0x94340; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Hi = 0x9434B; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Lo = 0x9434A; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Hi = 0x9434D; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Lo = 0x9434C; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Hi = 0x9434F; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Lo = 0x9434E; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Hi = 0x94351; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Lo = 0x94350; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Hi = 0x94353; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Lo = 0x94352; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Hi = 0x94349; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Lo = 0x94348; - constexpr unsigned int mmUMCCH5_PerfMonCtl1 = 0x94741; - constexpr unsigned int mmUMCCH5_PerfMonCtl2 = 0x94742; - constexpr unsigned int mmUMCCH5_PerfMonCtl3 = 0x94743; - constexpr unsigned int mmUMCCH5_PerfMonCtl4 = 0x94744; - constexpr unsigned int mmUMCCH5_PerfMonCtl5 = 0x94745; - constexpr unsigned int mmUMCCH5_PerfMonCtlClk = 0x94740; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Hi = 0x9474B; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Lo = 0x9474A; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Hi = 0x9474D; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Lo = 0x9474C; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Hi = 0x9474F; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Lo = 0x9474E; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Hi = 0x94751; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Lo = 0x94750; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Hi = 0x94753; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Lo = 0x94752; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Hi = 0x94749; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Lo = 0x94748; - constexpr unsigned int mmUMCCH6_PerfMonCtl1 = 0xD4341; - constexpr unsigned int mmUMCCH6_PerfMonCtl2 = 0xD4342; - constexpr unsigned int mmUMCCH6_PerfMonCtl3 = 0xD4343; - constexpr unsigned int mmUMCCH6_PerfMonCtl4 = 0xD4344; - constexpr unsigned int mmUMCCH6_PerfMonCtl5 = 0xD4345; - constexpr unsigned int mmUMCCH6_PerfMonCtlClk = 0xD4340; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Hi = 0xD434B; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Lo = 0xD434A; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Hi = 0xD434D; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Lo = 0xD434C; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Hi = 0xD434F; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Lo = 0xD434E; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Hi = 0xD4351; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Lo = 0xD4350; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Hi = 0xD4353; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Lo = 0xD4352; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Hi = 0xD4349; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Lo = 0xD4348; - constexpr unsigned int mmUMCCH7_PerfMonCtl1 = 0xD4741; - constexpr unsigned int mmUMCCH7_PerfMonCtl2 = 0xD4742; - constexpr unsigned int mmUMCCH7_PerfMonCtl3 = 0xD4743; - constexpr unsigned int mmUMCCH7_PerfMonCtl4 = 0xD4744; - constexpr unsigned int mmUMCCH7_PerfMonCtl5 = 0xD4745; - constexpr unsigned int mmUMCCH7_PerfMonCtlClk = 0xD4740; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Hi = 0xD474B; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Lo = 0xD474A; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Hi = 0xD474D; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Lo = 0xD474C; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Hi = 0xD474F; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Lo = 0xD474E; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Hi = 0xD4751; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Lo = 0xD4750; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Hi = 0xD4753; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Lo = 0xD4752; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Hi = 0xD4749; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Lo = 0xD4748; - constexpr unsigned int mmUMCCH8_PerfMonCtl1 = 0x114341; - constexpr unsigned int mmUMCCH8_PerfMonCtl2 = 0x114342; - constexpr unsigned int mmUMCCH8_PerfMonCtl3 = 0x114343; - constexpr unsigned int mmUMCCH8_PerfMonCtl4 = 0x114344; - constexpr unsigned int mmUMCCH8_PerfMonCtl5 = 0x114345; - constexpr unsigned int mmUMCCH8_PerfMonCtlClk = 0x114340; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Hi = 0x11434B; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Lo = 0x11434A; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Hi = 0x11434D; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Lo = 0x11434C; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Hi = 0x11434F; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Lo = 0x11434E; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Hi = 0x114351; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Lo = 0x114350; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Hi = 0x114353; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Lo = 0x114352; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Hi = 0x114349; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Lo = 0x114348; - constexpr unsigned int mmUMCCH9_PerfMonCtl1 = 0x114741; - constexpr unsigned int mmUMCCH9_PerfMonCtl2 = 0x114742; - constexpr unsigned int mmUMCCH9_PerfMonCtl3 = 0x114743; - constexpr unsigned int mmUMCCH9_PerfMonCtl4 = 0x114744; - constexpr unsigned int mmUMCCH9_PerfMonCtl5 = 0x114745; - constexpr unsigned int mmUMCCH9_PerfMonCtlClk = 0x114740; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Hi = 0x11474B; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Lo = 0x11474A; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Hi = 0x11474D; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Lo = 0x11474C; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Hi = 0x11474F; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Lo = 0x11474E; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Hi = 0x114751; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Lo = 0x114750; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Hi = 0x114753; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Lo = 0x114752; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Hi = 0x114749; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Lo = 0x114748; - constexpr unsigned int mmUMCCH10_PerfMonCtl1 = 0x154341; - constexpr unsigned int mmUMCCH10_PerfMonCtl2 = 0x154342; - constexpr unsigned int mmUMCCH10_PerfMonCtl3 = 0x154343; - constexpr unsigned int mmUMCCH10_PerfMonCtl4 = 0x154344; - constexpr unsigned int mmUMCCH10_PerfMonCtl5 = 0x154345; - constexpr unsigned int mmUMCCH10_PerfMonCtlClk = 0x154340; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Hi = 0x15434B; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Lo = 0x15434A; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Hi = 0x15434D; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Lo = 0x15434C; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Hi = 0x15434F; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Lo = 0x15434E; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Hi = 0x154351; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Lo = 0x154350; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Hi = 0x154353; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Lo = 0x154352; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Hi = 0x154349; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Lo = 0x154348; - constexpr unsigned int mmUMCCH11_PerfMonCtl1 = 0x154741; - constexpr unsigned int mmUMCCH11_PerfMonCtl2 = 0x154742; - constexpr unsigned int mmUMCCH11_PerfMonCtl3 = 0x154743; - constexpr unsigned int mmUMCCH11_PerfMonCtl4 = 0x154744; - constexpr unsigned int mmUMCCH11_PerfMonCtl5 = 0x154745; - constexpr unsigned int mmUMCCH11_PerfMonCtlClk = 0x154740; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Hi = 0x15474B; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Lo = 0x15474A; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Hi = 0x15474D; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Lo = 0x15474C; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Hi = 0x15474F; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Lo = 0x15474E; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Hi = 0x154751; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Lo = 0x154750; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Hi = 0x154753; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Lo = 0x154752; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Hi = 0x154749; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Lo = 0x154748; - constexpr unsigned int mmUMCCH12_PerfMonCtl1 = 0x194341; - constexpr unsigned int mmUMCCH12_PerfMonCtl2 = 0x194342; - constexpr unsigned int mmUMCCH12_PerfMonCtl3 = 0x194343; - constexpr unsigned int mmUMCCH12_PerfMonCtl4 = 0x194344; - constexpr unsigned int mmUMCCH12_PerfMonCtl5 = 0x194345; - constexpr unsigned int mmUMCCH12_PerfMonCtlClk = 0x194340; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Hi = 0x19434B; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Lo = 0x19434A; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Hi = 0x19434D; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Lo = 0x19434C; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Hi = 0x19434F; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Lo = 0x19434E; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Hi = 0x194351; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Lo = 0x194350; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Hi = 0x194353; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Lo = 0x194352; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Hi = 0x194349; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Lo = 0x194348; - constexpr unsigned int mmUMCCH13_PerfMonCtl1 = 0x194741; - constexpr unsigned int mmUMCCH13_PerfMonCtl2 = 0x194742; - constexpr unsigned int mmUMCCH13_PerfMonCtl3 = 0x194743; - constexpr unsigned int mmUMCCH13_PerfMonCtl4 = 0x194744; - constexpr unsigned int mmUMCCH13_PerfMonCtl5 = 0x194745; - constexpr unsigned int mmUMCCH13_PerfMonCtlClk = 0x194740; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Hi = 0x19474B; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Lo = 0x19474A; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Hi = 0x19474D; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Lo = 0x19474C; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Hi = 0x19474F; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Lo = 0x19474E; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Hi = 0x194751; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Lo = 0x194750; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Hi = 0x194753; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Lo = 0x194752; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Hi = 0x194749; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Lo = 0x194748; - constexpr unsigned int mmUMCCH14_PerfMonCtl1 = 0x1D4341; - constexpr unsigned int mmUMCCH14_PerfMonCtl2 = 0x1D4342; - constexpr unsigned int mmUMCCH14_PerfMonCtl3 = 0x1D4343; - constexpr unsigned int mmUMCCH14_PerfMonCtl4 = 0x1D4344; - constexpr unsigned int mmUMCCH14_PerfMonCtl5 = 0x1D4345; - constexpr unsigned int mmUMCCH14_PerfMonCtlClk = 0x1D4340; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Hi = 0x1D434B; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Lo = 0x1D434A; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Hi = 0x1D434D; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Lo = 0x1D434C; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Hi = 0x1D434F; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Lo = 0x1D434E; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Hi = 0x1D4351; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Lo = 0x1D4350; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Hi = 0x1D4353; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Lo = 0x1D4352; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Hi = 0x1D4349; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Lo = 0x1D4348; - constexpr unsigned int mmUMCCH15_PerfMonCtl1 = 0x1D4741; - constexpr unsigned int mmUMCCH15_PerfMonCtl2 = 0x1D4742; - constexpr unsigned int mmUMCCH15_PerfMonCtl3 = 0x1D4743; - constexpr unsigned int mmUMCCH15_PerfMonCtl4 = 0x1D4744; - constexpr unsigned int mmUMCCH15_PerfMonCtl5 = 0x1D4745; - constexpr unsigned int mmUMCCH15_PerfMonCtlClk = 0x1D4740; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Hi = 0x1D474B; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Lo = 0x1D474A; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Hi = 0x1D474D; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Lo = 0x1D474C; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Hi = 0x1D474F; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Lo = 0x1D474E; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Hi = 0x1D4751; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Lo = 0x1D4750; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Hi = 0x1D4753; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Lo = 0x1D4752; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Hi = 0x1D4749; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Lo = 0x1D4748; - constexpr unsigned int mmUTCL1_PERFCOUNTER0_HI = 0xD5A1; - constexpr unsigned int mmUTCL1_PERFCOUNTER0_LO = 0xD5A0; - constexpr unsigned int mmUTCL1_PERFCOUNTER0_SELECT = 0xDDA0; - constexpr unsigned int mmUTCL1_PERFCOUNTER1_HI = 0xD5A3; - constexpr unsigned int mmUTCL1_PERFCOUNTER1_LO = 0xD5A2; - constexpr unsigned int mmUTCL1_PERFCOUNTER1_SELECT = 0xDDA1; - constexpr unsigned int mmUTCL1_PERFCOUNTER2_HI = 0xD5A5; - constexpr unsigned int mmUTCL1_PERFCOUNTER2_LO = 0xD5A4; - constexpr unsigned int mmUTCL1_PERFCOUNTER2_SELECT = 0xDDA2; - constexpr unsigned int mmUTCL1_PERFCOUNTER3_HI = 0xD5A7; - constexpr unsigned int mmUTCL1_PERFCOUNTER3_LO = 0xD5A6; - constexpr unsigned int mmUTCL1_PERFCOUNTER3_SELECT = 0xDDA3; - constexpr unsigned int mmVGT_GS_OUT_PRIM_TYPE = 0xC266; -} // namespace Gfx11 -#endif - -namespace HasHwVs -{ - constexpr unsigned int mmSPI_SHADER_LATE_ALLOC_VS = 0x2C47; - constexpr unsigned int mmSPI_SHADER_PGM_HI_VS = 0x2C49; - constexpr unsigned int mmSPI_SHADER_PGM_LO_VS = 0x2C48; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC1_VS = 0x2C4A; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC2_VS = 0x2C4B; - constexpr unsigned int mmSPI_SHADER_PGM_RSRC3_VS = 0x2C46; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_0 = 0x2C4C; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_1 = 0x2C4D; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_2 = 0x2C4E; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_3 = 0x2C4F; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_4 = 0x2C50; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_5 = 0x2C51; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_6 = 0x2C52; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_7 = 0x2C53; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_8 = 0x2C54; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_9 = 0x2C55; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_10 = 0x2C56; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_11 = 0x2C57; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_12 = 0x2C58; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_13 = 0x2C59; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_14 = 0x2C5A; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_15 = 0x2C5B; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_16 = 0x2C5C; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_17 = 0x2C5D; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_18 = 0x2C5E; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_19 = 0x2C5F; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_20 = 0x2C60; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_21 = 0x2C61; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_22 = 0x2C62; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_23 = 0x2C63; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_24 = 0x2C64; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_25 = 0x2C65; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_26 = 0x2C66; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_27 = 0x2C67; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_28 = 0x2C68; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_29 = 0x2C69; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_30 = 0x2C6A; - constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_31 = 0x2C6B; - constexpr unsigned int mmVGT_DMA_CONTROL = 0x2272; - constexpr unsigned int mmVGT_DMA_EVENT_INITIATOR = 0xA2E7; - constexpr unsigned int mmVGT_DMA_LS_HS_CONFIG = 0x2273; - constexpr unsigned int mmVGT_DMA_PRIMITIVE_TYPE = 0x2271; - constexpr unsigned int mmVGT_GSVS_RING_ITEMSIZE = 0xA2AC; - constexpr unsigned int mmVGT_GSVS_RING_OFFSET_1 = 0xA298; - constexpr unsigned int mmVGT_GSVS_RING_OFFSET_2 = 0xA299; - constexpr unsigned int mmVGT_GSVS_RING_OFFSET_3 = 0xA29A; - constexpr unsigned int mmVGT_GS_MODE = 0xA290; - constexpr unsigned int mmVGT_GS_PER_ES = 0xA295; - constexpr unsigned int mmVGT_GS_PER_VS = 0xA297; - constexpr unsigned int mmVGT_GS_VERTEX_REUSE = 0x2235; - constexpr unsigned int mmVGT_GS_VERT_ITEMSIZE = 0xA2D7; - constexpr unsigned int mmVGT_GS_VERT_ITEMSIZE_1 = 0xA2D8; - constexpr unsigned int mmVGT_GS_VERT_ITEMSIZE_2 = 0xA2D9; - constexpr unsigned int mmVGT_GS_VERT_ITEMSIZE_3 = 0xA2DA; - constexpr unsigned int mmVGT_OUT_DEALLOC_CNTL = 0xA317; - constexpr unsigned int mmVGT_STRMOUT_BUFFER_CONFIG = 0xA2E6; - constexpr unsigned int mmVGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0xC244; - constexpr unsigned int mmVGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0xC245; - constexpr unsigned int mmVGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0xC246; - constexpr unsigned int mmVGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0xC247; - constexpr unsigned int mmVGT_STRMOUT_BUFFER_OFFSET_0 = 0xA2B7; - constexpr unsigned int mmVGT_STRMOUT_BUFFER_OFFSET_1 = 0xA2BB; - constexpr unsigned int mmVGT_STRMOUT_BUFFER_OFFSET_2 = 0xA2BF; - constexpr unsigned int mmVGT_STRMOUT_BUFFER_OFFSET_3 = 0xA2C3; - constexpr unsigned int mmVGT_STRMOUT_BUFFER_SIZE_0 = 0xA2B4; - constexpr unsigned int mmVGT_STRMOUT_BUFFER_SIZE_1 = 0xA2B8; - constexpr unsigned int mmVGT_STRMOUT_BUFFER_SIZE_2 = 0xA2BC; - constexpr unsigned int mmVGT_STRMOUT_BUFFER_SIZE_3 = 0xA2C0; - constexpr unsigned int mmVGT_STRMOUT_CONFIG = 0xA2E5; - constexpr unsigned int mmVGT_STRMOUT_VTX_STRIDE_0 = 0xA2B5; - constexpr unsigned int mmVGT_STRMOUT_VTX_STRIDE_1 = 0xA2B9; - constexpr unsigned int mmVGT_STRMOUT_VTX_STRIDE_2 = 0xA2BD; - constexpr unsigned int mmVGT_STRMOUT_VTX_STRIDE_3 = 0xA2C1; - constexpr unsigned int mmVGT_VERTEX_REUSE_BLOCK_CNTL = 0xA316; -} // namespace HasHwVs - -namespace NotGfx10 -{ - constexpr unsigned int mmSPI_CONFIG_CNTL = 0xC440; - constexpr unsigned int mmSPI_CONFIG_CNTL_1 = 0xC441; - constexpr unsigned int mmSPI_CONFIG_CNTL_2 = 0xC442; - constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_CS4 = 0x31CD; - constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_CS5 = 0x31CE; - constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_CS6 = 0x31CF; - constexpr unsigned int mmSPI_WCL_PIPE_PERCENT_CS7 = 0x31D0; - constexpr unsigned int mmVGT_HS_OFFCHIP_PARAM = 0xC24F; - constexpr unsigned int mmVGT_TF_MEMORY_BASE = 0xC250; - constexpr unsigned int mmVGT_TF_RING_SIZE = 0xC24E; -} // namespace NotGfx10 - -#if CHIP_HDR_NAVI21 -namespace Nv21 -{ - constexpr unsigned int mmCHCG_PERFCOUNTER0_HI = 0xD3C9; - constexpr unsigned int mmCHCG_PERFCOUNTER0_LO = 0xD3C8; - constexpr unsigned int mmCHCG_PERFCOUNTER0_SELECT = 0xDBC6; - constexpr unsigned int mmCHCG_PERFCOUNTER0_SELECT1 = 0xDBC7; - constexpr unsigned int mmCHCG_PERFCOUNTER1_HI = 0xD3CB; - constexpr unsigned int mmCHCG_PERFCOUNTER1_LO = 0xD3CA; - constexpr unsigned int mmCHCG_PERFCOUNTER1_SELECT = 0xDBC8; - constexpr unsigned int mmCHCG_PERFCOUNTER2_HI = 0xD3CD; - constexpr unsigned int mmCHCG_PERFCOUNTER2_LO = 0xD3CC; - constexpr unsigned int mmCHCG_PERFCOUNTER2_SELECT = 0xDBC9; - constexpr unsigned int mmCHCG_PERFCOUNTER3_HI = 0xD3CF; - constexpr unsigned int mmCHCG_PERFCOUNTER3_LO = 0xD3CE; - constexpr unsigned int mmCHCG_PERFCOUNTER3_SELECT = 0xDBCA; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi0 = 0x7511; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi1 = 0x7515; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi2 = 0x7519; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi3 = 0x751D; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi4 = 0x7621; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi5 = 0x7623; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi6 = 0x7625; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi7 = 0x7627; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo0 = 0x7510; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo1 = 0x7514; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo2 = 0x7518; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo3 = 0x751C; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo4 = 0x7620; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo5 = 0x7622; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo6 = 0x7624; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo7 = 0x7626; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi0 = 0x7513; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi1 = 0x7517; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi2 = 0x751B; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi3 = 0x751F; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi4 = 0x75E5; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi5 = 0x75E7; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi6 = 0x75E9; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi7 = 0x75EB; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo0 = 0x7512; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo1 = 0x7516; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo2 = 0x751A; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo3 = 0x751E; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo4 = 0x75E4; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo5 = 0x75E6; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo6 = 0x75E8; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo7 = 0x75EA; - constexpr unsigned int mmGUS_PERFCOUNTER0_CFG = 0xDE03; - constexpr unsigned int mmGUS_PERFCOUNTER1_CFG = 0xDE04; - constexpr unsigned int mmGUS_PERFCOUNTER2_HI = 0xD641; - constexpr unsigned int mmGUS_PERFCOUNTER2_LO = 0xD640; - constexpr unsigned int mmGUS_PERFCOUNTER2_MODE = 0xDE02; - constexpr unsigned int mmGUS_PERFCOUNTER2_SELECT = 0xDE00; - constexpr unsigned int mmGUS_PERFCOUNTER2_SELECT1 = 0xDE01; - constexpr unsigned int mmGUS_PERFCOUNTER_HI = 0xD643; - constexpr unsigned int mmGUS_PERFCOUNTER_LO = 0xD642; - constexpr unsigned int mmGUS_PERFCOUNTER_RSLT_CNTL = 0xDE05; - constexpr unsigned int mmMP1_SMN_FPS_CNT = 0x162C4; - constexpr unsigned int mmPerfMonCtl1 = 0x14341; - constexpr unsigned int mmPerfMonCtl2 = 0x14342; - constexpr unsigned int mmPerfMonCtl3 = 0x14343; - constexpr unsigned int mmPerfMonCtl4 = 0x14344; - constexpr unsigned int mmPerfMonCtl5 = 0x14345; - constexpr unsigned int mmPerfMonCtlClk = 0x14340; - constexpr unsigned int mmPerfMonCtr1_Hi = 0x1434B; - constexpr unsigned int mmPerfMonCtr1_Lo = 0x1434A; - constexpr unsigned int mmPerfMonCtr2_Hi = 0x1434D; - constexpr unsigned int mmPerfMonCtr2_Lo = 0x1434C; - constexpr unsigned int mmPerfMonCtr3_Hi = 0x1434F; - constexpr unsigned int mmPerfMonCtr3_Lo = 0x1434E; - constexpr unsigned int mmPerfMonCtr4_Hi = 0x14351; - constexpr unsigned int mmPerfMonCtr4_Lo = 0x14350; - constexpr unsigned int mmPerfMonCtr5_Hi = 0x14353; - constexpr unsigned int mmPerfMonCtr5_Lo = 0x14352; - constexpr unsigned int mmPerfMonCtrClk_Hi = 0x14349; - constexpr unsigned int mmPerfMonCtrClk_Lo = 0x14348; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_HIGHCOUNT = 0xDCA4; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_LOWCOUNT = 0xDCA5; - constexpr unsigned int mmSDMA1_PERFCNT_MISC_CNTL = 0xDE2F; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER0_CFG = 0xDE2C; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER1_CFG = 0xDE2D; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER_HI = 0xD66D; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER_LO = 0xD66C; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER_RSLT_CNTL = 0xDE2E; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_HI = 0xD66F; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_LO = 0xD66E; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_SELECT = 0xDE30; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_SELECT1 = 0xDE31; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_HI = 0xD671; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_LO = 0xD670; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_SELECT = 0xDE32; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_SELECT1 = 0xDE33; - constexpr unsigned int mmSDMA2_PERFCNT_MISC_CNTL = 0xDE3B; - constexpr unsigned int mmSDMA2_PERFCNT_PERFCOUNTER0_CFG = 0xDE38; - constexpr unsigned int mmSDMA2_PERFCNT_PERFCOUNTER1_CFG = 0xDE39; - constexpr unsigned int mmSDMA2_PERFCNT_PERFCOUNTER_HI = 0xD679; - constexpr unsigned int mmSDMA2_PERFCNT_PERFCOUNTER_LO = 0xD678; - constexpr unsigned int mmSDMA2_PERFCNT_PERFCOUNTER_RSLT_CNTL = 0xDE3A; - constexpr unsigned int mmSDMA2_PERFCOUNTER0_HI = 0xD67B; - constexpr unsigned int mmSDMA2_PERFCOUNTER0_LO = 0xD67A; - constexpr unsigned int mmSDMA2_PERFCOUNTER0_SELECT = 0xDE3C; - constexpr unsigned int mmSDMA2_PERFCOUNTER0_SELECT1 = 0xDE3D; - constexpr unsigned int mmSDMA2_PERFCOUNTER1_HI = 0xD67D; - constexpr unsigned int mmSDMA2_PERFCOUNTER1_LO = 0xD67C; - constexpr unsigned int mmSDMA2_PERFCOUNTER1_SELECT = 0xDE3E; - constexpr unsigned int mmSDMA2_PERFCOUNTER1_SELECT1 = 0xDE3F; - constexpr unsigned int mmSDMA3_PERFCNT_MISC_CNTL = 0xDE47; - constexpr unsigned int mmSDMA3_PERFCNT_PERFCOUNTER0_CFG = 0xDE44; - constexpr unsigned int mmSDMA3_PERFCNT_PERFCOUNTER1_CFG = 0xDE45; - constexpr unsigned int mmSDMA3_PERFCNT_PERFCOUNTER_HI = 0xD685; - constexpr unsigned int mmSDMA3_PERFCNT_PERFCOUNTER_LO = 0xD684; - constexpr unsigned int mmSDMA3_PERFCNT_PERFCOUNTER_RSLT_CNTL = 0xDE46; - constexpr unsigned int mmSDMA3_PERFCOUNTER0_HI = 0xD687; - constexpr unsigned int mmSDMA3_PERFCOUNTER0_LO = 0xD686; - constexpr unsigned int mmSDMA3_PERFCOUNTER0_SELECT = 0xDE48; - constexpr unsigned int mmSDMA3_PERFCOUNTER0_SELECT1 = 0xDE49; - constexpr unsigned int mmSDMA3_PERFCOUNTER1_HI = 0xD689; - constexpr unsigned int mmSDMA3_PERFCOUNTER1_LO = 0xD688; - constexpr unsigned int mmSDMA3_PERFCOUNTER1_SELECT = 0xDE4A; - constexpr unsigned int mmSDMA3_PERFCOUNTER1_SELECT1 = 0xDE4B; - constexpr unsigned int mmSPI_CONFIG_CNTL = 0x244C; - constexpr unsigned int mmSPI_CONFIG_CNTL_1 = 0x244F; - constexpr unsigned int mmSPI_CONFIG_CNTL_1_REMAP = 0xC441; - constexpr unsigned int mmSPI_CONFIG_CNTL_2 = 0x244E; - constexpr unsigned int mmSPI_CONFIG_CNTL_2_REMAP = 0xC442; - constexpr unsigned int mmSPI_CONFIG_CNTL_REMAP = 0xC440; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_2 = 0x31DE; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_3 = 0x31DF; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_4 = 0x31E0; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_5 = 0x31E1; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_6 = 0x31E2; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_7 = 0x31E3; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_8 = 0x31E4; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_9 = 0x31E5; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_2 = 0x31E8; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_3 = 0x31E9; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_4 = 0x31EA; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_5 = 0x31EB; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_6 = 0x31EC; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_7 = 0x31ED; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_8 = 0x31EE; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_9 = 0x31EF; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL = 0x244D; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL_REMAP = 0xC443; - constexpr unsigned int mmUMCCH1_PerfMonCtl1 = 0x14741; - constexpr unsigned int mmUMCCH1_PerfMonCtl2 = 0x14742; - constexpr unsigned int mmUMCCH1_PerfMonCtl3 = 0x14743; - constexpr unsigned int mmUMCCH1_PerfMonCtl4 = 0x14744; - constexpr unsigned int mmUMCCH1_PerfMonCtl5 = 0x14745; - constexpr unsigned int mmUMCCH1_PerfMonCtlClk = 0x14740; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Hi = 0x1474B; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Lo = 0x1474A; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Hi = 0x1474D; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Lo = 0x1474C; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Hi = 0x1474F; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Lo = 0x1474E; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Hi = 0x14751; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Lo = 0x14750; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Hi = 0x14753; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Lo = 0x14752; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Hi = 0x14749; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Lo = 0x14748; - constexpr unsigned int mmUMCCH3_PerfMonCtl1 = 0x54741; - constexpr unsigned int mmUMCCH3_PerfMonCtl2 = 0x54742; - constexpr unsigned int mmUMCCH3_PerfMonCtl3 = 0x54743; - constexpr unsigned int mmUMCCH3_PerfMonCtl4 = 0x54744; - constexpr unsigned int mmUMCCH3_PerfMonCtl5 = 0x54745; - constexpr unsigned int mmUMCCH3_PerfMonCtlClk = 0x54740; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Hi = 0x5474B; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Lo = 0x5474A; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Hi = 0x5474D; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Lo = 0x5474C; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Hi = 0x5474F; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Lo = 0x5474E; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Hi = 0x54751; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Lo = 0x54750; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Hi = 0x54753; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Lo = 0x54752; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Hi = 0x54749; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Lo = 0x54748; - constexpr unsigned int mmUMCCH4_PerfMonCtl1 = 0x94341; - constexpr unsigned int mmUMCCH4_PerfMonCtl2 = 0x94342; - constexpr unsigned int mmUMCCH4_PerfMonCtl3 = 0x94343; - constexpr unsigned int mmUMCCH4_PerfMonCtl4 = 0x94344; - constexpr unsigned int mmUMCCH4_PerfMonCtl5 = 0x94345; - constexpr unsigned int mmUMCCH4_PerfMonCtlClk = 0x94340; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Hi = 0x9434B; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Lo = 0x9434A; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Hi = 0x9434D; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Lo = 0x9434C; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Hi = 0x9434F; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Lo = 0x9434E; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Hi = 0x94351; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Lo = 0x94350; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Hi = 0x94353; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Lo = 0x94352; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Hi = 0x94349; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Lo = 0x94348; - constexpr unsigned int mmUMCCH5_PerfMonCtl1 = 0x94741; - constexpr unsigned int mmUMCCH5_PerfMonCtl2 = 0x94742; - constexpr unsigned int mmUMCCH5_PerfMonCtl3 = 0x94743; - constexpr unsigned int mmUMCCH5_PerfMonCtl4 = 0x94744; - constexpr unsigned int mmUMCCH5_PerfMonCtl5 = 0x94745; - constexpr unsigned int mmUMCCH5_PerfMonCtlClk = 0x94740; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Hi = 0x9474B; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Lo = 0x9474A; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Hi = 0x9474D; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Lo = 0x9474C; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Hi = 0x9474F; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Lo = 0x9474E; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Hi = 0x94751; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Lo = 0x94750; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Hi = 0x94753; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Lo = 0x94752; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Hi = 0x94749; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Lo = 0x94748; - constexpr unsigned int mmUMCCH6_PerfMonCtl1 = 0xD4341; - constexpr unsigned int mmUMCCH6_PerfMonCtl2 = 0xD4342; - constexpr unsigned int mmUMCCH6_PerfMonCtl3 = 0xD4343; - constexpr unsigned int mmUMCCH6_PerfMonCtl4 = 0xD4344; - constexpr unsigned int mmUMCCH6_PerfMonCtl5 = 0xD4345; - constexpr unsigned int mmUMCCH6_PerfMonCtlClk = 0xD4340; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Hi = 0xD434B; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Lo = 0xD434A; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Hi = 0xD434D; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Lo = 0xD434C; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Hi = 0xD434F; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Lo = 0xD434E; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Hi = 0xD4351; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Lo = 0xD4350; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Hi = 0xD4353; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Lo = 0xD4352; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Hi = 0xD4349; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Lo = 0xD4348; - constexpr unsigned int mmUMCCH7_PerfMonCtl1 = 0xD4741; - constexpr unsigned int mmUMCCH7_PerfMonCtl2 = 0xD4742; - constexpr unsigned int mmUMCCH7_PerfMonCtl3 = 0xD4743; - constexpr unsigned int mmUMCCH7_PerfMonCtl4 = 0xD4744; - constexpr unsigned int mmUMCCH7_PerfMonCtl5 = 0xD4745; - constexpr unsigned int mmUMCCH7_PerfMonCtlClk = 0xD4740; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Hi = 0xD474B; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Lo = 0xD474A; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Hi = 0xD474D; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Lo = 0xD474C; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Hi = 0xD474F; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Lo = 0xD474E; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Hi = 0xD4751; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Lo = 0xD4750; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Hi = 0xD4753; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Lo = 0xD4752; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Hi = 0xD4749; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Lo = 0xD4748; - constexpr unsigned int mmUMCCH8_PerfMonCtl1 = 0x114341; - constexpr unsigned int mmUMCCH8_PerfMonCtl2 = 0x114342; - constexpr unsigned int mmUMCCH8_PerfMonCtl3 = 0x114343; - constexpr unsigned int mmUMCCH8_PerfMonCtl4 = 0x114344; - constexpr unsigned int mmUMCCH8_PerfMonCtl5 = 0x114345; - constexpr unsigned int mmUMCCH8_PerfMonCtlClk = 0x114340; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Hi = 0x11434B; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Lo = 0x11434A; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Hi = 0x11434D; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Lo = 0x11434C; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Hi = 0x11434F; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Lo = 0x11434E; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Hi = 0x114351; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Lo = 0x114350; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Hi = 0x114353; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Lo = 0x114352; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Hi = 0x114349; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Lo = 0x114348; - constexpr unsigned int mmUMCCH9_PerfMonCtl1 = 0x114741; - constexpr unsigned int mmUMCCH9_PerfMonCtl2 = 0x114742; - constexpr unsigned int mmUMCCH9_PerfMonCtl3 = 0x114743; - constexpr unsigned int mmUMCCH9_PerfMonCtl4 = 0x114744; - constexpr unsigned int mmUMCCH9_PerfMonCtl5 = 0x114745; - constexpr unsigned int mmUMCCH9_PerfMonCtlClk = 0x114740; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Hi = 0x11474B; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Lo = 0x11474A; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Hi = 0x11474D; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Lo = 0x11474C; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Hi = 0x11474F; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Lo = 0x11474E; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Hi = 0x114751; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Lo = 0x114750; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Hi = 0x114753; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Lo = 0x114752; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Hi = 0x114749; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Lo = 0x114748; - constexpr unsigned int mmUMCCH10_PerfMonCtl1 = 0x154341; - constexpr unsigned int mmUMCCH10_PerfMonCtl2 = 0x154342; - constexpr unsigned int mmUMCCH10_PerfMonCtl3 = 0x154343; - constexpr unsigned int mmUMCCH10_PerfMonCtl4 = 0x154344; - constexpr unsigned int mmUMCCH10_PerfMonCtl5 = 0x154345; - constexpr unsigned int mmUMCCH10_PerfMonCtlClk = 0x154340; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Hi = 0x15434B; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Lo = 0x15434A; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Hi = 0x15434D; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Lo = 0x15434C; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Hi = 0x15434F; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Lo = 0x15434E; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Hi = 0x154351; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Lo = 0x154350; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Hi = 0x154353; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Lo = 0x154352; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Hi = 0x154349; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Lo = 0x154348; - constexpr unsigned int mmUMCCH11_PerfMonCtl1 = 0x154741; - constexpr unsigned int mmUMCCH11_PerfMonCtl2 = 0x154742; - constexpr unsigned int mmUMCCH11_PerfMonCtl3 = 0x154743; - constexpr unsigned int mmUMCCH11_PerfMonCtl4 = 0x154744; - constexpr unsigned int mmUMCCH11_PerfMonCtl5 = 0x154745; - constexpr unsigned int mmUMCCH11_PerfMonCtlClk = 0x154740; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Hi = 0x15474B; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Lo = 0x15474A; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Hi = 0x15474D; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Lo = 0x15474C; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Hi = 0x15474F; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Lo = 0x15474E; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Hi = 0x154751; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Lo = 0x154750; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Hi = 0x154753; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Lo = 0x154752; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Hi = 0x154749; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Lo = 0x154748; - constexpr unsigned int mmUMCCH12_PerfMonCtl1 = 0x194341; - constexpr unsigned int mmUMCCH12_PerfMonCtl2 = 0x194342; - constexpr unsigned int mmUMCCH12_PerfMonCtl3 = 0x194343; - constexpr unsigned int mmUMCCH12_PerfMonCtl4 = 0x194344; - constexpr unsigned int mmUMCCH12_PerfMonCtl5 = 0x194345; - constexpr unsigned int mmUMCCH12_PerfMonCtlClk = 0x194340; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Hi = 0x19434B; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Lo = 0x19434A; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Hi = 0x19434D; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Lo = 0x19434C; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Hi = 0x19434F; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Lo = 0x19434E; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Hi = 0x194351; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Lo = 0x194350; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Hi = 0x194353; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Lo = 0x194352; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Hi = 0x194349; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Lo = 0x194348; - constexpr unsigned int mmUMCCH13_PerfMonCtl1 = 0x194741; - constexpr unsigned int mmUMCCH13_PerfMonCtl2 = 0x194742; - constexpr unsigned int mmUMCCH13_PerfMonCtl3 = 0x194743; - constexpr unsigned int mmUMCCH13_PerfMonCtl4 = 0x194744; - constexpr unsigned int mmUMCCH13_PerfMonCtl5 = 0x194745; - constexpr unsigned int mmUMCCH13_PerfMonCtlClk = 0x194740; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Hi = 0x19474B; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Lo = 0x19474A; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Hi = 0x19474D; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Lo = 0x19474C; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Hi = 0x19474F; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Lo = 0x19474E; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Hi = 0x194751; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Lo = 0x194750; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Hi = 0x194753; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Lo = 0x194752; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Hi = 0x194749; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Lo = 0x194748; - constexpr unsigned int mmUMCCH14_PerfMonCtl1 = 0x1D4341; - constexpr unsigned int mmUMCCH14_PerfMonCtl2 = 0x1D4342; - constexpr unsigned int mmUMCCH14_PerfMonCtl3 = 0x1D4343; - constexpr unsigned int mmUMCCH14_PerfMonCtl4 = 0x1D4344; - constexpr unsigned int mmUMCCH14_PerfMonCtl5 = 0x1D4345; - constexpr unsigned int mmUMCCH14_PerfMonCtlClk = 0x1D4340; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Hi = 0x1D434B; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Lo = 0x1D434A; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Hi = 0x1D434D; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Lo = 0x1D434C; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Hi = 0x1D434F; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Lo = 0x1D434E; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Hi = 0x1D4351; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Lo = 0x1D4350; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Hi = 0x1D4353; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Lo = 0x1D4352; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Hi = 0x1D4349; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Lo = 0x1D4348; - constexpr unsigned int mmUMCCH15_PerfMonCtl1 = 0x1D4741; - constexpr unsigned int mmUMCCH15_PerfMonCtl2 = 0x1D4742; - constexpr unsigned int mmUMCCH15_PerfMonCtl3 = 0x1D4743; - constexpr unsigned int mmUMCCH15_PerfMonCtl4 = 0x1D4744; - constexpr unsigned int mmUMCCH15_PerfMonCtl5 = 0x1D4745; - constexpr unsigned int mmUMCCH15_PerfMonCtlClk = 0x1D4740; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Hi = 0x1D474B; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Lo = 0x1D474A; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Hi = 0x1D474D; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Lo = 0x1D474C; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Hi = 0x1D474F; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Lo = 0x1D474E; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Hi = 0x1D4751; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Lo = 0x1D4750; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Hi = 0x1D4753; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Lo = 0x1D4752; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Hi = 0x1D4749; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Lo = 0x1D4748; - constexpr unsigned int mmVGT_ESGS_RING_SIZE = 0x2221; - constexpr unsigned int mmVGT_ESGS_RING_SIZE_UMD = 0xC240; - constexpr unsigned int mmVGT_GSVS_RING_SIZE = 0x2222; - constexpr unsigned int mmVGT_GSVS_RING_SIZE_UMD = 0xC241; - constexpr unsigned int mmVGT_HS_OFFCHIP_PARAM = 0x2224; - constexpr unsigned int mmVGT_HS_OFFCHIP_PARAM_UMD = 0xC24F; - constexpr unsigned int mmVGT_TF_MEMORY_BASE = 0x2225; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI = 0x2226; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI_UMD = 0xC261; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_UMD = 0xC250; - constexpr unsigned int mmVGT_TF_RING_SIZE = 0x2223; - constexpr unsigned int mmVGT_TF_RING_SIZE_UMD = 0xC24E; -} // namespace Nv21 -#endif - -#if CHIP_HDR_NAVI22 -namespace Nv22 -{ - constexpr unsigned int mmCHCG_PERFCOUNTER0_HI = 0xD3C9; - constexpr unsigned int mmCHCG_PERFCOUNTER0_LO = 0xD3C8; - constexpr unsigned int mmCHCG_PERFCOUNTER0_SELECT = 0xDBC6; - constexpr unsigned int mmCHCG_PERFCOUNTER0_SELECT1 = 0xDBC7; - constexpr unsigned int mmCHCG_PERFCOUNTER1_HI = 0xD3CB; - constexpr unsigned int mmCHCG_PERFCOUNTER1_LO = 0xD3CA; - constexpr unsigned int mmCHCG_PERFCOUNTER1_SELECT = 0xDBC8; - constexpr unsigned int mmCHCG_PERFCOUNTER2_HI = 0xD3CD; - constexpr unsigned int mmCHCG_PERFCOUNTER2_LO = 0xD3CC; - constexpr unsigned int mmCHCG_PERFCOUNTER2_SELECT = 0xDBC9; - constexpr unsigned int mmCHCG_PERFCOUNTER3_HI = 0xD3CF; - constexpr unsigned int mmCHCG_PERFCOUNTER3_LO = 0xD3CE; - constexpr unsigned int mmCHCG_PERFCOUNTER3_SELECT = 0xDBCA; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi0 = 0x7511; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi1 = 0x7515; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi2 = 0x7519; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi3 = 0x751D; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi4 = 0x7621; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi5 = 0x7623; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi6 = 0x7625; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi7 = 0x7627; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo0 = 0x7510; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo1 = 0x7514; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo2 = 0x7518; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo3 = 0x751C; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo4 = 0x7620; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo5 = 0x7622; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo6 = 0x7624; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo7 = 0x7626; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi0 = 0x7513; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi1 = 0x7517; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi2 = 0x751B; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi3 = 0x751F; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi4 = 0x75E5; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi5 = 0x75E7; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi6 = 0x75E9; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi7 = 0x75EB; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo0 = 0x7512; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo1 = 0x7516; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo2 = 0x751A; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo3 = 0x751E; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo4 = 0x75E4; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo5 = 0x75E6; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo6 = 0x75E8; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo7 = 0x75EA; - constexpr unsigned int mmGUS_PERFCOUNTER0_CFG = 0xDE03; - constexpr unsigned int mmGUS_PERFCOUNTER1_CFG = 0xDE04; - constexpr unsigned int mmGUS_PERFCOUNTER2_HI = 0xD641; - constexpr unsigned int mmGUS_PERFCOUNTER2_LO = 0xD640; - constexpr unsigned int mmGUS_PERFCOUNTER2_MODE = 0xDE02; - constexpr unsigned int mmGUS_PERFCOUNTER2_SELECT = 0xDE00; - constexpr unsigned int mmGUS_PERFCOUNTER2_SELECT1 = 0xDE01; - constexpr unsigned int mmGUS_PERFCOUNTER_HI = 0xD643; - constexpr unsigned int mmGUS_PERFCOUNTER_LO = 0xD642; - constexpr unsigned int mmGUS_PERFCOUNTER_RSLT_CNTL = 0xDE05; - constexpr unsigned int mmMP1_SMN_FPS_CNT = 0x162C4; - constexpr unsigned int mmPerfMonCtl1 = 0x14341; - constexpr unsigned int mmPerfMonCtl2 = 0x14342; - constexpr unsigned int mmPerfMonCtl3 = 0x14343; - constexpr unsigned int mmPerfMonCtl4 = 0x14344; - constexpr unsigned int mmPerfMonCtl5 = 0x14345; - constexpr unsigned int mmPerfMonCtlClk = 0x14340; - constexpr unsigned int mmPerfMonCtr1_Hi = 0x1434B; - constexpr unsigned int mmPerfMonCtr1_Lo = 0x1434A; - constexpr unsigned int mmPerfMonCtr2_Hi = 0x1434D; - constexpr unsigned int mmPerfMonCtr2_Lo = 0x1434C; - constexpr unsigned int mmPerfMonCtr3_Hi = 0x1434F; - constexpr unsigned int mmPerfMonCtr3_Lo = 0x1434E; - constexpr unsigned int mmPerfMonCtr4_Hi = 0x14351; - constexpr unsigned int mmPerfMonCtr4_Lo = 0x14350; - constexpr unsigned int mmPerfMonCtr5_Hi = 0x14353; - constexpr unsigned int mmPerfMonCtr5_Lo = 0x14352; - constexpr unsigned int mmPerfMonCtrClk_Hi = 0x14349; - constexpr unsigned int mmPerfMonCtrClk_Lo = 0x14348; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_HIGHCOUNT = 0xDCA4; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_LOWCOUNT = 0xDCA5; - constexpr unsigned int mmSDMA1_PERFCNT_MISC_CNTL = 0xDE2F; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER0_CFG = 0xDE2C; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER1_CFG = 0xDE2D; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER_HI = 0xD66D; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER_LO = 0xD66C; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER_RSLT_CNTL = 0xDE2E; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_HI = 0xD66F; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_LO = 0xD66E; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_SELECT = 0xDE30; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_SELECT1 = 0xDE31; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_HI = 0xD671; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_LO = 0xD670; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_SELECT = 0xDE32; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_SELECT1 = 0xDE33; - constexpr unsigned int mmSPI_CONFIG_CNTL = 0x244C; - constexpr unsigned int mmSPI_CONFIG_CNTL_1 = 0x244F; - constexpr unsigned int mmSPI_CONFIG_CNTL_1_REMAP = 0xC441; - constexpr unsigned int mmSPI_CONFIG_CNTL_2 = 0x244E; - constexpr unsigned int mmSPI_CONFIG_CNTL_2_REMAP = 0xC442; - constexpr unsigned int mmSPI_CONFIG_CNTL_REMAP = 0xC440; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_2 = 0x31DE; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_3 = 0x31DF; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_4 = 0x31E0; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_5 = 0x31E1; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_6 = 0x31E2; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_7 = 0x31E3; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_8 = 0x31E4; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_9 = 0x31E5; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_2 = 0x31E8; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_3 = 0x31E9; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_4 = 0x31EA; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_5 = 0x31EB; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_6 = 0x31EC; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_7 = 0x31ED; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_8 = 0x31EE; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_9 = 0x31EF; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL = 0x244D; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL_REMAP = 0xC443; - constexpr unsigned int mmUMCCH1_PerfMonCtl1 = 0x14741; - constexpr unsigned int mmUMCCH1_PerfMonCtl2 = 0x14742; - constexpr unsigned int mmUMCCH1_PerfMonCtl3 = 0x14743; - constexpr unsigned int mmUMCCH1_PerfMonCtl4 = 0x14744; - constexpr unsigned int mmUMCCH1_PerfMonCtl5 = 0x14745; - constexpr unsigned int mmUMCCH1_PerfMonCtlClk = 0x14740; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Hi = 0x1474B; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Lo = 0x1474A; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Hi = 0x1474D; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Lo = 0x1474C; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Hi = 0x1474F; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Lo = 0x1474E; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Hi = 0x14751; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Lo = 0x14750; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Hi = 0x14753; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Lo = 0x14752; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Hi = 0x14749; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Lo = 0x14748; - constexpr unsigned int mmUMCCH3_PerfMonCtl1 = 0x54741; - constexpr unsigned int mmUMCCH3_PerfMonCtl2 = 0x54742; - constexpr unsigned int mmUMCCH3_PerfMonCtl3 = 0x54743; - constexpr unsigned int mmUMCCH3_PerfMonCtl4 = 0x54744; - constexpr unsigned int mmUMCCH3_PerfMonCtl5 = 0x54745; - constexpr unsigned int mmUMCCH3_PerfMonCtlClk = 0x54740; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Hi = 0x5474B; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Lo = 0x5474A; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Hi = 0x5474D; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Lo = 0x5474C; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Hi = 0x5474F; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Lo = 0x5474E; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Hi = 0x54751; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Lo = 0x54750; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Hi = 0x54753; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Lo = 0x54752; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Hi = 0x54749; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Lo = 0x54748; - constexpr unsigned int mmUMCCH4_PerfMonCtl1 = 0x94341; - constexpr unsigned int mmUMCCH4_PerfMonCtl2 = 0x94342; - constexpr unsigned int mmUMCCH4_PerfMonCtl3 = 0x94343; - constexpr unsigned int mmUMCCH4_PerfMonCtl4 = 0x94344; - constexpr unsigned int mmUMCCH4_PerfMonCtl5 = 0x94345; - constexpr unsigned int mmUMCCH4_PerfMonCtlClk = 0x94340; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Hi = 0x9434B; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Lo = 0x9434A; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Hi = 0x9434D; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Lo = 0x9434C; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Hi = 0x9434F; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Lo = 0x9434E; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Hi = 0x94351; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Lo = 0x94350; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Hi = 0x94353; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Lo = 0x94352; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Hi = 0x94349; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Lo = 0x94348; - constexpr unsigned int mmUMCCH5_PerfMonCtl1 = 0x94741; - constexpr unsigned int mmUMCCH5_PerfMonCtl2 = 0x94742; - constexpr unsigned int mmUMCCH5_PerfMonCtl3 = 0x94743; - constexpr unsigned int mmUMCCH5_PerfMonCtl4 = 0x94744; - constexpr unsigned int mmUMCCH5_PerfMonCtl5 = 0x94745; - constexpr unsigned int mmUMCCH5_PerfMonCtlClk = 0x94740; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Hi = 0x9474B; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Lo = 0x9474A; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Hi = 0x9474D; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Lo = 0x9474C; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Hi = 0x9474F; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Lo = 0x9474E; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Hi = 0x94751; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Lo = 0x94750; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Hi = 0x94753; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Lo = 0x94752; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Hi = 0x94749; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Lo = 0x94748; - constexpr unsigned int mmUMCCH6_PerfMonCtl1 = 0xD4341; - constexpr unsigned int mmUMCCH6_PerfMonCtl2 = 0xD4342; - constexpr unsigned int mmUMCCH6_PerfMonCtl3 = 0xD4343; - constexpr unsigned int mmUMCCH6_PerfMonCtl4 = 0xD4344; - constexpr unsigned int mmUMCCH6_PerfMonCtl5 = 0xD4345; - constexpr unsigned int mmUMCCH6_PerfMonCtlClk = 0xD4340; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Hi = 0xD434B; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Lo = 0xD434A; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Hi = 0xD434D; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Lo = 0xD434C; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Hi = 0xD434F; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Lo = 0xD434E; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Hi = 0xD4351; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Lo = 0xD4350; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Hi = 0xD4353; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Lo = 0xD4352; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Hi = 0xD4349; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Lo = 0xD4348; - constexpr unsigned int mmUMCCH7_PerfMonCtl1 = 0xD4741; - constexpr unsigned int mmUMCCH7_PerfMonCtl2 = 0xD4742; - constexpr unsigned int mmUMCCH7_PerfMonCtl3 = 0xD4743; - constexpr unsigned int mmUMCCH7_PerfMonCtl4 = 0xD4744; - constexpr unsigned int mmUMCCH7_PerfMonCtl5 = 0xD4745; - constexpr unsigned int mmUMCCH7_PerfMonCtlClk = 0xD4740; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Hi = 0xD474B; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Lo = 0xD474A; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Hi = 0xD474D; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Lo = 0xD474C; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Hi = 0xD474F; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Lo = 0xD474E; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Hi = 0xD4751; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Lo = 0xD4750; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Hi = 0xD4753; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Lo = 0xD4752; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Hi = 0xD4749; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Lo = 0xD4748; - constexpr unsigned int mmUMCCH8_PerfMonCtl1 = 0x114341; - constexpr unsigned int mmUMCCH8_PerfMonCtl2 = 0x114342; - constexpr unsigned int mmUMCCH8_PerfMonCtl3 = 0x114343; - constexpr unsigned int mmUMCCH8_PerfMonCtl4 = 0x114344; - constexpr unsigned int mmUMCCH8_PerfMonCtl5 = 0x114345; - constexpr unsigned int mmUMCCH8_PerfMonCtlClk = 0x114340; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Hi = 0x11434B; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Lo = 0x11434A; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Hi = 0x11434D; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Lo = 0x11434C; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Hi = 0x11434F; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Lo = 0x11434E; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Hi = 0x114351; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Lo = 0x114350; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Hi = 0x114353; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Lo = 0x114352; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Hi = 0x114349; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Lo = 0x114348; - constexpr unsigned int mmUMCCH9_PerfMonCtl1 = 0x114741; - constexpr unsigned int mmUMCCH9_PerfMonCtl2 = 0x114742; - constexpr unsigned int mmUMCCH9_PerfMonCtl3 = 0x114743; - constexpr unsigned int mmUMCCH9_PerfMonCtl4 = 0x114744; - constexpr unsigned int mmUMCCH9_PerfMonCtl5 = 0x114745; - constexpr unsigned int mmUMCCH9_PerfMonCtlClk = 0x114740; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Hi = 0x11474B; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Lo = 0x11474A; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Hi = 0x11474D; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Lo = 0x11474C; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Hi = 0x11474F; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Lo = 0x11474E; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Hi = 0x114751; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Lo = 0x114750; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Hi = 0x114753; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Lo = 0x114752; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Hi = 0x114749; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Lo = 0x114748; - constexpr unsigned int mmUMCCH10_PerfMonCtl1 = 0x154341; - constexpr unsigned int mmUMCCH10_PerfMonCtl2 = 0x154342; - constexpr unsigned int mmUMCCH10_PerfMonCtl3 = 0x154343; - constexpr unsigned int mmUMCCH10_PerfMonCtl4 = 0x154344; - constexpr unsigned int mmUMCCH10_PerfMonCtl5 = 0x154345; - constexpr unsigned int mmUMCCH10_PerfMonCtlClk = 0x154340; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Hi = 0x15434B; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Lo = 0x15434A; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Hi = 0x15434D; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Lo = 0x15434C; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Hi = 0x15434F; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Lo = 0x15434E; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Hi = 0x154351; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Lo = 0x154350; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Hi = 0x154353; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Lo = 0x154352; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Hi = 0x154349; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Lo = 0x154348; - constexpr unsigned int mmUMCCH11_PerfMonCtl1 = 0x154741; - constexpr unsigned int mmUMCCH11_PerfMonCtl2 = 0x154742; - constexpr unsigned int mmUMCCH11_PerfMonCtl3 = 0x154743; - constexpr unsigned int mmUMCCH11_PerfMonCtl4 = 0x154744; - constexpr unsigned int mmUMCCH11_PerfMonCtl5 = 0x154745; - constexpr unsigned int mmUMCCH11_PerfMonCtlClk = 0x154740; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Hi = 0x15474B; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Lo = 0x15474A; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Hi = 0x15474D; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Lo = 0x15474C; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Hi = 0x15474F; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Lo = 0x15474E; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Hi = 0x154751; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Lo = 0x154750; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Hi = 0x154753; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Lo = 0x154752; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Hi = 0x154749; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Lo = 0x154748; - constexpr unsigned int mmVGT_ESGS_RING_SIZE = 0x2221; - constexpr unsigned int mmVGT_ESGS_RING_SIZE_UMD = 0xC240; - constexpr unsigned int mmVGT_GSVS_RING_SIZE = 0x2222; - constexpr unsigned int mmVGT_GSVS_RING_SIZE_UMD = 0xC241; - constexpr unsigned int mmVGT_HS_OFFCHIP_PARAM = 0x2224; - constexpr unsigned int mmVGT_HS_OFFCHIP_PARAM_UMD = 0xC24F; - constexpr unsigned int mmVGT_TF_MEMORY_BASE = 0x2225; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI = 0x2226; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI_UMD = 0xC261; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_UMD = 0xC250; - constexpr unsigned int mmVGT_TF_RING_SIZE = 0x2223; - constexpr unsigned int mmVGT_TF_RING_SIZE_UMD = 0xC24E; -} // namespace Nv22 -#endif - -#if CHIP_HDR_NAVI23 -namespace Nv23 -{ - constexpr unsigned int mmCHCG_PERFCOUNTER0_HI = 0xD3C9; - constexpr unsigned int mmCHCG_PERFCOUNTER0_LO = 0xD3C8; - constexpr unsigned int mmCHCG_PERFCOUNTER0_SELECT = 0xDBC6; - constexpr unsigned int mmCHCG_PERFCOUNTER0_SELECT1 = 0xDBC7; - constexpr unsigned int mmCHCG_PERFCOUNTER1_HI = 0xD3CB; - constexpr unsigned int mmCHCG_PERFCOUNTER1_LO = 0xD3CA; - constexpr unsigned int mmCHCG_PERFCOUNTER1_SELECT = 0xDBC8; - constexpr unsigned int mmCHCG_PERFCOUNTER2_HI = 0xD3CD; - constexpr unsigned int mmCHCG_PERFCOUNTER2_LO = 0xD3CC; - constexpr unsigned int mmCHCG_PERFCOUNTER2_SELECT = 0xDBC9; - constexpr unsigned int mmCHCG_PERFCOUNTER3_HI = 0xD3CF; - constexpr unsigned int mmCHCG_PERFCOUNTER3_LO = 0xD3CE; - constexpr unsigned int mmCHCG_PERFCOUNTER3_SELECT = 0xDBCA; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi0 = 0x7511; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi1 = 0x7515; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi2 = 0x7519; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi3 = 0x751D; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi4 = 0x7621; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi5 = 0x7623; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi6 = 0x7625; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi7 = 0x7627; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo0 = 0x7510; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo1 = 0x7514; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo2 = 0x7518; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo3 = 0x751C; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo4 = 0x7620; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo5 = 0x7622; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo6 = 0x7624; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo7 = 0x7626; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi0 = 0x7513; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi1 = 0x7517; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi2 = 0x751B; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi3 = 0x751F; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi4 = 0x75E5; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi5 = 0x75E7; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi6 = 0x75E9; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi7 = 0x75EB; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo0 = 0x7512; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo1 = 0x7516; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo2 = 0x751A; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo3 = 0x751E; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo4 = 0x75E4; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo5 = 0x75E6; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo6 = 0x75E8; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo7 = 0x75EA; - constexpr unsigned int mmGUS_PERFCOUNTER0_CFG = 0xDE03; - constexpr unsigned int mmGUS_PERFCOUNTER1_CFG = 0xDE04; - constexpr unsigned int mmGUS_PERFCOUNTER2_HI = 0xD641; - constexpr unsigned int mmGUS_PERFCOUNTER2_LO = 0xD640; - constexpr unsigned int mmGUS_PERFCOUNTER2_MODE = 0xDE02; - constexpr unsigned int mmGUS_PERFCOUNTER2_SELECT = 0xDE00; - constexpr unsigned int mmGUS_PERFCOUNTER2_SELECT1 = 0xDE01; - constexpr unsigned int mmGUS_PERFCOUNTER_HI = 0xD643; - constexpr unsigned int mmGUS_PERFCOUNTER_LO = 0xD642; - constexpr unsigned int mmGUS_PERFCOUNTER_RSLT_CNTL = 0xDE05; - constexpr unsigned int mmMP1_SMN_FPS_CNT = 0x162C4; - constexpr unsigned int mmPerfMonCtl1 = 0x14341; - constexpr unsigned int mmPerfMonCtl2 = 0x14342; - constexpr unsigned int mmPerfMonCtl3 = 0x14343; - constexpr unsigned int mmPerfMonCtl4 = 0x14344; - constexpr unsigned int mmPerfMonCtl5 = 0x14345; - constexpr unsigned int mmPerfMonCtlClk = 0x14340; - constexpr unsigned int mmPerfMonCtr1_Hi = 0x1434B; - constexpr unsigned int mmPerfMonCtr1_Lo = 0x1434A; - constexpr unsigned int mmPerfMonCtr2_Hi = 0x1434D; - constexpr unsigned int mmPerfMonCtr2_Lo = 0x1434C; - constexpr unsigned int mmPerfMonCtr3_Hi = 0x1434F; - constexpr unsigned int mmPerfMonCtr3_Lo = 0x1434E; - constexpr unsigned int mmPerfMonCtr4_Hi = 0x14351; - constexpr unsigned int mmPerfMonCtr4_Lo = 0x14350; - constexpr unsigned int mmPerfMonCtr5_Hi = 0x14353; - constexpr unsigned int mmPerfMonCtr5_Lo = 0x14352; - constexpr unsigned int mmPerfMonCtrClk_Hi = 0x14349; - constexpr unsigned int mmPerfMonCtrClk_Lo = 0x14348; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_HIGHCOUNT = 0xDCA5; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_LOWCOUNT = 0xDCA4; - constexpr unsigned int mmSDMA1_PERFCNT_MISC_CNTL = 0xDE2F; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER0_CFG = 0xDE2C; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER1_CFG = 0xDE2D; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER_HI = 0xD66D; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER_LO = 0xD66C; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER_RSLT_CNTL = 0xDE2E; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_HI = 0xD66F; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_LO = 0xD66E; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_SELECT = 0xDE30; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_SELECT1 = 0xDE31; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_HI = 0xD671; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_LO = 0xD670; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_SELECT = 0xDE32; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_SELECT1 = 0xDE33; - constexpr unsigned int mmSPI_CONFIG_CNTL = 0x244C; - constexpr unsigned int mmSPI_CONFIG_CNTL_1 = 0x244F; - constexpr unsigned int mmSPI_CONFIG_CNTL_1_REMAP = 0xC441; - constexpr unsigned int mmSPI_CONFIG_CNTL_2 = 0x244E; - constexpr unsigned int mmSPI_CONFIG_CNTL_2_REMAP = 0xC442; - constexpr unsigned int mmSPI_CONFIG_CNTL_REMAP = 0xC440; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_2 = 0x31DE; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_3 = 0x31DF; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_4 = 0x31E0; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_5 = 0x31E1; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_6 = 0x31E2; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_7 = 0x31E3; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_2 = 0x31E8; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_3 = 0x31E9; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_4 = 0x31EA; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_5 = 0x31EB; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_6 = 0x31EC; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_7 = 0x31ED; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL = 0x244D; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL_REMAP = 0xC443; - constexpr unsigned int mmUMCCH1_PerfMonCtl1 = 0x14741; - constexpr unsigned int mmUMCCH1_PerfMonCtl2 = 0x14742; - constexpr unsigned int mmUMCCH1_PerfMonCtl3 = 0x14743; - constexpr unsigned int mmUMCCH1_PerfMonCtl4 = 0x14744; - constexpr unsigned int mmUMCCH1_PerfMonCtl5 = 0x14745; - constexpr unsigned int mmUMCCH1_PerfMonCtlClk = 0x14740; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Hi = 0x1474B; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Lo = 0x1474A; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Hi = 0x1474D; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Lo = 0x1474C; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Hi = 0x1474F; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Lo = 0x1474E; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Hi = 0x14751; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Lo = 0x14750; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Hi = 0x14753; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Lo = 0x14752; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Hi = 0x14749; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Lo = 0x14748; - constexpr unsigned int mmUMCCH3_PerfMonCtl1 = 0x54741; - constexpr unsigned int mmUMCCH3_PerfMonCtl2 = 0x54742; - constexpr unsigned int mmUMCCH3_PerfMonCtl3 = 0x54743; - constexpr unsigned int mmUMCCH3_PerfMonCtl4 = 0x54744; - constexpr unsigned int mmUMCCH3_PerfMonCtl5 = 0x54745; - constexpr unsigned int mmUMCCH3_PerfMonCtlClk = 0x54740; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Hi = 0x5474B; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Lo = 0x5474A; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Hi = 0x5474D; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Lo = 0x5474C; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Hi = 0x5474F; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Lo = 0x5474E; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Hi = 0x54751; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Lo = 0x54750; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Hi = 0x54753; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Lo = 0x54752; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Hi = 0x54749; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Lo = 0x54748; - constexpr unsigned int mmUMCCH4_PerfMonCtl1 = 0x94341; - constexpr unsigned int mmUMCCH4_PerfMonCtl2 = 0x94342; - constexpr unsigned int mmUMCCH4_PerfMonCtl3 = 0x94343; - constexpr unsigned int mmUMCCH4_PerfMonCtl4 = 0x94344; - constexpr unsigned int mmUMCCH4_PerfMonCtl5 = 0x94345; - constexpr unsigned int mmUMCCH4_PerfMonCtlClk = 0x94340; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Hi = 0x9434B; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Lo = 0x9434A; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Hi = 0x9434D; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Lo = 0x9434C; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Hi = 0x9434F; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Lo = 0x9434E; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Hi = 0x94351; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Lo = 0x94350; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Hi = 0x94353; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Lo = 0x94352; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Hi = 0x94349; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Lo = 0x94348; - constexpr unsigned int mmUMCCH5_PerfMonCtl1 = 0x94741; - constexpr unsigned int mmUMCCH5_PerfMonCtl2 = 0x94742; - constexpr unsigned int mmUMCCH5_PerfMonCtl3 = 0x94743; - constexpr unsigned int mmUMCCH5_PerfMonCtl4 = 0x94744; - constexpr unsigned int mmUMCCH5_PerfMonCtl5 = 0x94745; - constexpr unsigned int mmUMCCH5_PerfMonCtlClk = 0x94740; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Hi = 0x9474B; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Lo = 0x9474A; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Hi = 0x9474D; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Lo = 0x9474C; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Hi = 0x9474F; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Lo = 0x9474E; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Hi = 0x94751; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Lo = 0x94750; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Hi = 0x94753; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Lo = 0x94752; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Hi = 0x94749; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Lo = 0x94748; - constexpr unsigned int mmUMCCH6_PerfMonCtl1 = 0xD4341; - constexpr unsigned int mmUMCCH6_PerfMonCtl2 = 0xD4342; - constexpr unsigned int mmUMCCH6_PerfMonCtl3 = 0xD4343; - constexpr unsigned int mmUMCCH6_PerfMonCtl4 = 0xD4344; - constexpr unsigned int mmUMCCH6_PerfMonCtl5 = 0xD4345; - constexpr unsigned int mmUMCCH6_PerfMonCtlClk = 0xD4340; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Hi = 0xD434B; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Lo = 0xD434A; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Hi = 0xD434D; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Lo = 0xD434C; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Hi = 0xD434F; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Lo = 0xD434E; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Hi = 0xD4351; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Lo = 0xD4350; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Hi = 0xD4353; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Lo = 0xD4352; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Hi = 0xD4349; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Lo = 0xD4348; - constexpr unsigned int mmUMCCH7_PerfMonCtl1 = 0xD4741; - constexpr unsigned int mmUMCCH7_PerfMonCtl2 = 0xD4742; - constexpr unsigned int mmUMCCH7_PerfMonCtl3 = 0xD4743; - constexpr unsigned int mmUMCCH7_PerfMonCtl4 = 0xD4744; - constexpr unsigned int mmUMCCH7_PerfMonCtl5 = 0xD4745; - constexpr unsigned int mmUMCCH7_PerfMonCtlClk = 0xD4740; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Hi = 0xD474B; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Lo = 0xD474A; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Hi = 0xD474D; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Lo = 0xD474C; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Hi = 0xD474F; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Lo = 0xD474E; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Hi = 0xD4751; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Lo = 0xD4750; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Hi = 0xD4753; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Lo = 0xD4752; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Hi = 0xD4749; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Lo = 0xD4748; - constexpr unsigned int mmVGT_ESGS_RING_SIZE = 0x2221; - constexpr unsigned int mmVGT_ESGS_RING_SIZE_UMD = 0xC240; - constexpr unsigned int mmVGT_GSVS_RING_SIZE = 0x2222; - constexpr unsigned int mmVGT_GSVS_RING_SIZE_UMD = 0xC241; - constexpr unsigned int mmVGT_HS_OFFCHIP_PARAM = 0x2224; - constexpr unsigned int mmVGT_HS_OFFCHIP_PARAM_UMD = 0xC24F; - constexpr unsigned int mmVGT_TF_MEMORY_BASE = 0x2225; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI = 0x2226; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI_UMD = 0xC261; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_UMD = 0xC250; - constexpr unsigned int mmVGT_TF_RING_SIZE = 0x2223; - constexpr unsigned int mmVGT_TF_RING_SIZE_UMD = 0xC24E; -} // namespace Nv23 -#endif - -#if CHIP_HDR_NAVI24 -namespace Nv24 -{ - constexpr unsigned int mmCHCG_PERFCOUNTER0_HI = 0xD3C9; - constexpr unsigned int mmCHCG_PERFCOUNTER0_LO = 0xD3C8; - constexpr unsigned int mmCHCG_PERFCOUNTER0_SELECT = 0xDBC6; - constexpr unsigned int mmCHCG_PERFCOUNTER0_SELECT1 = 0xDBC7; - constexpr unsigned int mmCHCG_PERFCOUNTER1_HI = 0xD3CB; - constexpr unsigned int mmCHCG_PERFCOUNTER1_LO = 0xD3CA; - constexpr unsigned int mmCHCG_PERFCOUNTER1_SELECT = 0xDBC8; - constexpr unsigned int mmCHCG_PERFCOUNTER2_HI = 0xD3CD; - constexpr unsigned int mmCHCG_PERFCOUNTER2_LO = 0xD3CC; - constexpr unsigned int mmCHCG_PERFCOUNTER2_SELECT = 0xDBC9; - constexpr unsigned int mmCHCG_PERFCOUNTER3_HI = 0xD3CF; - constexpr unsigned int mmCHCG_PERFCOUNTER3_LO = 0xD3CE; - constexpr unsigned int mmCHCG_PERFCOUNTER3_SELECT = 0xDBCA; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi0 = 0x7511; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi1 = 0x7515; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi2 = 0x7519; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi3 = 0x751D; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi4 = 0x7621; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi5 = 0x7623; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi6 = 0x7625; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi7 = 0x7627; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo0 = 0x7510; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo1 = 0x7514; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo2 = 0x7518; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo3 = 0x751C; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo4 = 0x7620; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo5 = 0x7622; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo6 = 0x7624; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo7 = 0x7626; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi0 = 0x7513; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi1 = 0x7517; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi2 = 0x751B; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi3 = 0x751F; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi4 = 0x75E5; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi5 = 0x75E7; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi6 = 0x75E9; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi7 = 0x75EB; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo0 = 0x7512; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo1 = 0x7516; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo2 = 0x751A; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo3 = 0x751E; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo4 = 0x75E4; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo5 = 0x75E6; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo6 = 0x75E8; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo7 = 0x75EA; - constexpr unsigned int mmGUS_PERFCOUNTER0_CFG = 0xDE03; - constexpr unsigned int mmGUS_PERFCOUNTER1_CFG = 0xDE04; - constexpr unsigned int mmGUS_PERFCOUNTER2_HI = 0xD641; - constexpr unsigned int mmGUS_PERFCOUNTER2_LO = 0xD640; - constexpr unsigned int mmGUS_PERFCOUNTER2_MODE = 0xDE02; - constexpr unsigned int mmGUS_PERFCOUNTER2_SELECT = 0xDE00; - constexpr unsigned int mmGUS_PERFCOUNTER2_SELECT1 = 0xDE01; - constexpr unsigned int mmGUS_PERFCOUNTER_HI = 0xD643; - constexpr unsigned int mmGUS_PERFCOUNTER_LO = 0xD642; - constexpr unsigned int mmGUS_PERFCOUNTER_RSLT_CNTL = 0xDE05; - constexpr unsigned int mmMP1_SMN_FPS_CNT = 0x162C4; - constexpr unsigned int mmPerfMonCtl1 = 0x14341; - constexpr unsigned int mmPerfMonCtl2 = 0x14342; - constexpr unsigned int mmPerfMonCtl3 = 0x14343; - constexpr unsigned int mmPerfMonCtl4 = 0x14344; - constexpr unsigned int mmPerfMonCtl5 = 0x14345; - constexpr unsigned int mmPerfMonCtlClk = 0x14340; - constexpr unsigned int mmPerfMonCtr1_Hi = 0x1434B; - constexpr unsigned int mmPerfMonCtr1_Lo = 0x1434A; - constexpr unsigned int mmPerfMonCtr2_Hi = 0x1434D; - constexpr unsigned int mmPerfMonCtr2_Lo = 0x1434C; - constexpr unsigned int mmPerfMonCtr3_Hi = 0x1434F; - constexpr unsigned int mmPerfMonCtr3_Lo = 0x1434E; - constexpr unsigned int mmPerfMonCtr4_Hi = 0x14351; - constexpr unsigned int mmPerfMonCtr4_Lo = 0x14350; - constexpr unsigned int mmPerfMonCtr5_Hi = 0x14353; - constexpr unsigned int mmPerfMonCtr5_Lo = 0x14352; - constexpr unsigned int mmPerfMonCtrClk_Hi = 0x14349; - constexpr unsigned int mmPerfMonCtrClk_Lo = 0x14348; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_HIGHCOUNT = 0xDCA5; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_LOWCOUNT = 0xDCA4; - constexpr unsigned int mmSPI_CONFIG_CNTL = 0x244C; - constexpr unsigned int mmSPI_CONFIG_CNTL_1 = 0x244F; - constexpr unsigned int mmSPI_CONFIG_CNTL_1_REMAP = 0xC441; - constexpr unsigned int mmSPI_CONFIG_CNTL_2 = 0x244E; - constexpr unsigned int mmSPI_CONFIG_CNTL_2_REMAP = 0xC442; - constexpr unsigned int mmSPI_CONFIG_CNTL_REMAP = 0xC440; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_2 = 0x31DE; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_3 = 0x31DF; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_4 = 0x31E0; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_5 = 0x31E1; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_6 = 0x31E2; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_7 = 0x31E3; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_2 = 0x31E8; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_3 = 0x31E9; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_4 = 0x31EA; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_5 = 0x31EB; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_6 = 0x31EC; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_7 = 0x31ED; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL = 0x244D; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL_REMAP = 0xC443; - constexpr unsigned int mmUMCCH1_PerfMonCtl1 = 0x14741; - constexpr unsigned int mmUMCCH1_PerfMonCtl2 = 0x14742; - constexpr unsigned int mmUMCCH1_PerfMonCtl3 = 0x14743; - constexpr unsigned int mmUMCCH1_PerfMonCtl4 = 0x14744; - constexpr unsigned int mmUMCCH1_PerfMonCtl5 = 0x14745; - constexpr unsigned int mmUMCCH1_PerfMonCtlClk = 0x14740; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Hi = 0x1474B; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Lo = 0x1474A; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Hi = 0x1474D; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Lo = 0x1474C; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Hi = 0x1474F; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Lo = 0x1474E; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Hi = 0x14751; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Lo = 0x14750; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Hi = 0x14753; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Lo = 0x14752; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Hi = 0x14749; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Lo = 0x14748; - constexpr unsigned int mmUMCCH3_PerfMonCtl1 = 0x54741; - constexpr unsigned int mmUMCCH3_PerfMonCtl2 = 0x54742; - constexpr unsigned int mmUMCCH3_PerfMonCtl3 = 0x54743; - constexpr unsigned int mmUMCCH3_PerfMonCtl4 = 0x54744; - constexpr unsigned int mmUMCCH3_PerfMonCtl5 = 0x54745; - constexpr unsigned int mmUMCCH3_PerfMonCtlClk = 0x54740; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Hi = 0x5474B; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Lo = 0x5474A; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Hi = 0x5474D; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Lo = 0x5474C; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Hi = 0x5474F; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Lo = 0x5474E; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Hi = 0x54751; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Lo = 0x54750; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Hi = 0x54753; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Lo = 0x54752; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Hi = 0x54749; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Lo = 0x54748; - constexpr unsigned int mmVGT_ESGS_RING_SIZE = 0x2221; - constexpr unsigned int mmVGT_ESGS_RING_SIZE_UMD = 0xC240; - constexpr unsigned int mmVGT_GSVS_RING_SIZE = 0x2222; - constexpr unsigned int mmVGT_GSVS_RING_SIZE_UMD = 0xC241; - constexpr unsigned int mmVGT_HS_OFFCHIP_PARAM = 0x2224; - constexpr unsigned int mmVGT_HS_OFFCHIP_PARAM_UMD = 0xC24F; - constexpr unsigned int mmVGT_TF_MEMORY_BASE = 0x2225; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI = 0x2226; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI_UMD = 0xC261; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_UMD = 0xC250; - constexpr unsigned int mmVGT_TF_RING_SIZE = 0x2223; - constexpr unsigned int mmVGT_TF_RING_SIZE_UMD = 0xC24E; -} // namespace Nv24 -#endif - -#if CHIP_HDR_NAVI31 -namespace Nv31 -{ - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi0 = 0x12400C81; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi1 = 0x12400C83; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi2 = 0x12400C85; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi3 = 0x12400C87; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi4 = 0x12400C89; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi5 = 0x12400C8B; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi6 = 0x12400C8D; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi7 = 0x12400C8F; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo0 = 0x12400C80; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo1 = 0x12400C82; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo2 = 0x12400C84; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo3 = 0x12400C86; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo4 = 0x12400C88; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo5 = 0x12400C8A; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo6 = 0x12400C8C; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo7 = 0x12400C8E; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi0 = 0x12400CC1; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi1 = 0x12400CC3; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi2 = 0x12400CC5; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi3 = 0x12400CC7; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi4 = 0x12400CC9; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi5 = 0x12400CCB; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi6 = 0x12400CCD; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi7 = 0x12400CCF; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo0 = 0x12400CC0; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo1 = 0x12400CC2; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo2 = 0x12400CC4; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo3 = 0x12400CC6; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo4 = 0x12400CC8; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo5 = 0x12400CCA; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo6 = 0x12400CCC; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo7 = 0x12400CCE; - constexpr unsigned int mmGRBM_SE3_PERFCOUNTER_HI = 0xD04C; - constexpr unsigned int mmGRBM_SE3_PERFCOUNTER_LO = 0xD04B; - constexpr unsigned int mmGRBM_SE3_PERFCOUNTER_SELECT = 0xD845; - constexpr unsigned int mmGRBM_SE4_PERFCOUNTER_HI = 0xD04E; - constexpr unsigned int mmGRBM_SE4_PERFCOUNTER_LO = 0xD04D; - constexpr unsigned int mmGRBM_SE4_PERFCOUNTER_SELECT = 0xD846; - constexpr unsigned int mmGRBM_SE5_PERFCOUNTER_HI = 0xD050; - constexpr unsigned int mmGRBM_SE5_PERFCOUNTER_LO = 0xD04F; - constexpr unsigned int mmGRBM_SE5_PERFCOUNTER_SELECT = 0xD847; - constexpr unsigned int mmGRBM_SE6_PERFCOUNTER_HI = 0xD052; - constexpr unsigned int mmGRBM_SE6_PERFCOUNTER_LO = 0xD051; - constexpr unsigned int mmGRBM_SE6_PERFCOUNTER_SELECT = 0xD848; - constexpr unsigned int mmMP1_SMN_FPS_CNT = 0x162C4; - constexpr unsigned int mmPerfMonCtl1 = 0xC400344; - constexpr unsigned int mmPerfMonCtl2 = 0xC400347; - constexpr unsigned int mmPerfMonCtl3 = 0xC40034A; - constexpr unsigned int mmPerfMonCtl4 = 0xC40034D; - constexpr unsigned int mmPerfMonCtl5 = 0xC400350; - constexpr unsigned int mmPerfMonCtl6 = 0xC400353; - constexpr unsigned int mmPerfMonCtl7 = 0xC400356; - constexpr unsigned int mmPerfMonCtl8 = 0xC400359; - constexpr unsigned int mmPerfMonCtl9 = 0xC40035C; - constexpr unsigned int mmPerfMonCtl10 = 0xC40035F; - constexpr unsigned int mmPerfMonCtl11 = 0xC400362; - constexpr unsigned int mmPerfMonCtl12 = 0xC400365; - constexpr unsigned int mmPerfMonCtlClk = 0xC400340; - constexpr unsigned int mmPerfMonCtr1_Hi = 0xC400346; - constexpr unsigned int mmPerfMonCtr1_Lo = 0xC400345; - constexpr unsigned int mmPerfMonCtr2_Hi = 0xC400349; - constexpr unsigned int mmPerfMonCtr2_Lo = 0xC400348; - constexpr unsigned int mmPerfMonCtr3_Hi = 0xC40034C; - constexpr unsigned int mmPerfMonCtr3_Lo = 0xC40034B; - constexpr unsigned int mmPerfMonCtr4_Hi = 0xC40034F; - constexpr unsigned int mmPerfMonCtr4_Lo = 0xC40034E; - constexpr unsigned int mmPerfMonCtr5_Hi = 0xC400352; - constexpr unsigned int mmPerfMonCtr5_Lo = 0xC400351; - constexpr unsigned int mmPerfMonCtr6_Hi = 0xC400355; - constexpr unsigned int mmPerfMonCtr6_Lo = 0xC400354; - constexpr unsigned int mmPerfMonCtr7_Hi = 0xC400358; - constexpr unsigned int mmPerfMonCtr7_Lo = 0xC400357; - constexpr unsigned int mmPerfMonCtr8_Hi = 0xC40035B; - constexpr unsigned int mmPerfMonCtr8_Lo = 0xC40035A; - constexpr unsigned int mmPerfMonCtr9_Hi = 0xC40035E; - constexpr unsigned int mmPerfMonCtr9_Lo = 0xC40035D; - constexpr unsigned int mmPerfMonCtr10_Hi = 0xC400361; - constexpr unsigned int mmPerfMonCtr10_Lo = 0xC400360; - constexpr unsigned int mmPerfMonCtr11_Hi = 0xC400364; - constexpr unsigned int mmPerfMonCtr11_Lo = 0xC400363; - constexpr unsigned int mmPerfMonCtr12_Hi = 0xC400367; - constexpr unsigned int mmPerfMonCtr12_Lo = 0xC400366; - constexpr unsigned int mmPerfMonCtrClk_Hi = 0xC400342; - constexpr unsigned int mmPerfMonCtrClk_Lo = 0xC400341; - constexpr unsigned int mmRPB_PERFCOUNTER0_CFG = 0x0C82; - constexpr unsigned int mmRPB_PERFCOUNTER1_CFG = 0x0C83; - constexpr unsigned int mmRPB_PERFCOUNTER2_CFG = 0x0C84; - constexpr unsigned int mmRPB_PERFCOUNTER3_CFG = 0x0C85; - constexpr unsigned int mmRPB_PERFCOUNTER_HI = 0x0C81; - constexpr unsigned int mmRPB_PERFCOUNTER_LO = 0x0C80; - constexpr unsigned int mmRPB_PERFCOUNTER_RSLT_CNTL = 0x0C86; - constexpr unsigned int mmRPB_PERF_COUNTER_CNTL = 0x0C7E; - constexpr unsigned int mmUMCCH0_PerfMonCtl1 = 0xC400344; - constexpr unsigned int mmUMCCH0_PerfMonCtl2 = 0xC400347; - constexpr unsigned int mmUMCCH0_PerfMonCtl3 = 0xC40034A; - constexpr unsigned int mmUMCCH0_PerfMonCtl4 = 0xC40034D; - constexpr unsigned int mmUMCCH0_PerfMonCtl5 = 0xC400350; - constexpr unsigned int mmUMCCH0_PerfMonCtlClk = 0xC400340; - constexpr unsigned int mmUMCCH0_PerfMonCtr1_Hi = 0xC400346; - constexpr unsigned int mmUMCCH0_PerfMonCtr1_Lo = 0xC400345; - constexpr unsigned int mmUMCCH0_PerfMonCtr2_Hi = 0xC400349; - constexpr unsigned int mmUMCCH0_PerfMonCtr2_Lo = 0xC400348; - constexpr unsigned int mmUMCCH0_PerfMonCtr3_Hi = 0xC40034C; - constexpr unsigned int mmUMCCH0_PerfMonCtr3_Lo = 0xC40034B; - constexpr unsigned int mmUMCCH0_PerfMonCtr4_Hi = 0xC40034F; - constexpr unsigned int mmUMCCH0_PerfMonCtr4_Lo = 0xC40034E; - constexpr unsigned int mmUMCCH0_PerfMonCtr5_Hi = 0xC400352; - constexpr unsigned int mmUMCCH0_PerfMonCtr5_Lo = 0xC400351; - constexpr unsigned int mmUMCCH0_PerfMonCtrClk_Hi = 0xC400342; - constexpr unsigned int mmUMCCH0_PerfMonCtrClk_Lo = 0xC400341; - constexpr unsigned int mmUMCCH1_PerfMonCtl1 = 0xC400744; - constexpr unsigned int mmUMCCH1_PerfMonCtl2 = 0xC400747; - constexpr unsigned int mmUMCCH1_PerfMonCtl3 = 0xC40074A; - constexpr unsigned int mmUMCCH1_PerfMonCtl4 = 0xC40074D; - constexpr unsigned int mmUMCCH1_PerfMonCtl5 = 0xC400750; - constexpr unsigned int mmUMCCH1_PerfMonCtlClk = 0xC400740; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Hi = 0xC400746; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Lo = 0xC400745; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Hi = 0xC400749; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Lo = 0xC400748; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Hi = 0xC40074C; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Lo = 0xC40074B; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Hi = 0xC40074F; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Lo = 0xC40074E; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Hi = 0xC400752; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Lo = 0xC400751; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Hi = 0xC400742; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Lo = 0xC400741; - constexpr unsigned int mmUMCCH2_PerfMonCtl1 = 0xC404344; - constexpr unsigned int mmUMCCH2_PerfMonCtl2 = 0xC404347; - constexpr unsigned int mmUMCCH2_PerfMonCtl3 = 0xC40434A; - constexpr unsigned int mmUMCCH2_PerfMonCtl4 = 0xC40434D; - constexpr unsigned int mmUMCCH2_PerfMonCtl5 = 0xC404350; - constexpr unsigned int mmUMCCH2_PerfMonCtlClk = 0xC404340; - constexpr unsigned int mmUMCCH2_PerfMonCtr1_Hi = 0xC404346; - constexpr unsigned int mmUMCCH2_PerfMonCtr1_Lo = 0xC404345; - constexpr unsigned int mmUMCCH2_PerfMonCtr2_Hi = 0xC404349; - constexpr unsigned int mmUMCCH2_PerfMonCtr2_Lo = 0xC404348; - constexpr unsigned int mmUMCCH2_PerfMonCtr3_Hi = 0xC40434C; - constexpr unsigned int mmUMCCH2_PerfMonCtr3_Lo = 0xC40434B; - constexpr unsigned int mmUMCCH2_PerfMonCtr4_Hi = 0xC40434F; - constexpr unsigned int mmUMCCH2_PerfMonCtr4_Lo = 0xC40434E; - constexpr unsigned int mmUMCCH2_PerfMonCtr5_Hi = 0xC404352; - constexpr unsigned int mmUMCCH2_PerfMonCtr5_Lo = 0xC404351; - constexpr unsigned int mmUMCCH2_PerfMonCtrClk_Hi = 0xC404342; - constexpr unsigned int mmUMCCH2_PerfMonCtrClk_Lo = 0xC404341; - constexpr unsigned int mmUMCCH3_PerfMonCtl1 = 0xC404744; - constexpr unsigned int mmUMCCH3_PerfMonCtl2 = 0xC404747; - constexpr unsigned int mmUMCCH3_PerfMonCtl3 = 0xC40474A; - constexpr unsigned int mmUMCCH3_PerfMonCtl4 = 0xC40474D; - constexpr unsigned int mmUMCCH3_PerfMonCtl5 = 0xC404750; - constexpr unsigned int mmUMCCH3_PerfMonCtlClk = 0xC404740; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Hi = 0xC404746; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Lo = 0xC404745; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Hi = 0xC404749; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Lo = 0xC404748; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Hi = 0xC40474C; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Lo = 0xC40474B; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Hi = 0xC40474F; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Lo = 0xC40474E; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Hi = 0xC404752; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Lo = 0xC404751; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Hi = 0xC404742; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Lo = 0xC404741; - constexpr unsigned int mmUMCCH4_PerfMonCtl1 = 0xCC00344; - constexpr unsigned int mmUMCCH4_PerfMonCtl2 = 0xCC00347; - constexpr unsigned int mmUMCCH4_PerfMonCtl3 = 0xCC0034A; - constexpr unsigned int mmUMCCH4_PerfMonCtl4 = 0xCC0034D; - constexpr unsigned int mmUMCCH4_PerfMonCtl5 = 0xCC00350; - constexpr unsigned int mmUMCCH4_PerfMonCtlClk = 0xCC00340; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Hi = 0xCC00346; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Lo = 0xCC00345; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Hi = 0xCC00349; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Lo = 0xCC00348; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Hi = 0xCC0034C; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Lo = 0xCC0034B; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Hi = 0xCC0034F; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Lo = 0xCC0034E; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Hi = 0xCC00352; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Lo = 0xCC00351; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Hi = 0xCC00342; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Lo = 0xCC00341; - constexpr unsigned int mmUMCCH5_PerfMonCtl1 = 0xCC00744; - constexpr unsigned int mmUMCCH5_PerfMonCtl2 = 0xCC00747; - constexpr unsigned int mmUMCCH5_PerfMonCtl3 = 0xCC0074A; - constexpr unsigned int mmUMCCH5_PerfMonCtl4 = 0xCC0074D; - constexpr unsigned int mmUMCCH5_PerfMonCtl5 = 0xCC00750; - constexpr unsigned int mmUMCCH5_PerfMonCtlClk = 0xCC00740; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Hi = 0xCC00746; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Lo = 0xCC00745; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Hi = 0xCC00749; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Lo = 0xCC00748; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Hi = 0xCC0074C; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Lo = 0xCC0074B; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Hi = 0xCC0074F; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Lo = 0xCC0074E; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Hi = 0xCC00752; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Lo = 0xCC00751; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Hi = 0xCC00742; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Lo = 0xCC00741; - constexpr unsigned int mmUMCCH6_PerfMonCtl1 = 0xCC04344; - constexpr unsigned int mmUMCCH6_PerfMonCtl2 = 0xCC04347; - constexpr unsigned int mmUMCCH6_PerfMonCtl3 = 0xCC0434A; - constexpr unsigned int mmUMCCH6_PerfMonCtl4 = 0xCC0434D; - constexpr unsigned int mmUMCCH6_PerfMonCtl5 = 0xCC04350; - constexpr unsigned int mmUMCCH6_PerfMonCtlClk = 0xCC04340; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Hi = 0xCC04346; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Lo = 0xCC04345; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Hi = 0xCC04349; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Lo = 0xCC04348; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Hi = 0xCC0434C; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Lo = 0xCC0434B; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Hi = 0xCC0434F; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Lo = 0xCC0434E; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Hi = 0xCC04352; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Lo = 0xCC04351; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Hi = 0xCC04342; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Lo = 0xCC04341; - constexpr unsigned int mmUMCCH7_PerfMonCtl1 = 0xCC04744; - constexpr unsigned int mmUMCCH7_PerfMonCtl2 = 0xCC04747; - constexpr unsigned int mmUMCCH7_PerfMonCtl3 = 0xCC0474A; - constexpr unsigned int mmUMCCH7_PerfMonCtl4 = 0xCC0474D; - constexpr unsigned int mmUMCCH7_PerfMonCtl5 = 0xCC04750; - constexpr unsigned int mmUMCCH7_PerfMonCtlClk = 0xCC04740; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Hi = 0xCC04746; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Lo = 0xCC04745; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Hi = 0xCC04749; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Lo = 0xCC04748; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Hi = 0xCC0474C; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Lo = 0xCC0474B; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Hi = 0xCC0474F; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Lo = 0xCC0474E; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Hi = 0xCC04752; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Lo = 0xCC04751; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Hi = 0xCC04742; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Lo = 0xCC04741; - constexpr unsigned int mmUMCCH8_PerfMonCtl1 = 0xD400344; - constexpr unsigned int mmUMCCH8_PerfMonCtl2 = 0xD400347; - constexpr unsigned int mmUMCCH8_PerfMonCtl3 = 0xD40034A; - constexpr unsigned int mmUMCCH8_PerfMonCtl4 = 0xD40034D; - constexpr unsigned int mmUMCCH8_PerfMonCtl5 = 0xD400350; - constexpr unsigned int mmUMCCH8_PerfMonCtlClk = 0xD400340; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Hi = 0xD400346; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Lo = 0xD400345; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Hi = 0xD400349; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Lo = 0xD400348; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Hi = 0xD40034C; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Lo = 0xD40034B; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Hi = 0xD40034F; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Lo = 0xD40034E; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Hi = 0xD400352; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Lo = 0xD400351; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Hi = 0xD400342; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Lo = 0xD400341; - constexpr unsigned int mmUMCCH9_PerfMonCtl1 = 0xD400744; - constexpr unsigned int mmUMCCH9_PerfMonCtl2 = 0xD400747; - constexpr unsigned int mmUMCCH9_PerfMonCtl3 = 0xD40074A; - constexpr unsigned int mmUMCCH9_PerfMonCtl4 = 0xD40074D; - constexpr unsigned int mmUMCCH9_PerfMonCtl5 = 0xD400750; - constexpr unsigned int mmUMCCH9_PerfMonCtlClk = 0xD400740; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Hi = 0xD400746; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Lo = 0xD400745; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Hi = 0xD400749; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Lo = 0xD400748; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Hi = 0xD40074C; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Lo = 0xD40074B; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Hi = 0xD40074F; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Lo = 0xD40074E; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Hi = 0xD400752; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Lo = 0xD400751; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Hi = 0xD400742; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Lo = 0xD400741; - constexpr unsigned int mmUMCCH10_PerfMonCtl1 = 0xD404344; - constexpr unsigned int mmUMCCH10_PerfMonCtl2 = 0xD404347; - constexpr unsigned int mmUMCCH10_PerfMonCtl3 = 0xD40434A; - constexpr unsigned int mmUMCCH10_PerfMonCtl4 = 0xD40434D; - constexpr unsigned int mmUMCCH10_PerfMonCtl5 = 0xD404350; - constexpr unsigned int mmUMCCH10_PerfMonCtlClk = 0xD404340; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Hi = 0xD404346; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Lo = 0xD404345; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Hi = 0xD404349; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Lo = 0xD404348; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Hi = 0xD40434C; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Lo = 0xD40434B; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Hi = 0xD40434F; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Lo = 0xD40434E; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Hi = 0xD404352; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Lo = 0xD404351; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Hi = 0xD404342; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Lo = 0xD404341; - constexpr unsigned int mmUMCCH11_PerfMonCtl1 = 0xD404744; - constexpr unsigned int mmUMCCH11_PerfMonCtl2 = 0xD404747; - constexpr unsigned int mmUMCCH11_PerfMonCtl3 = 0xD40474A; - constexpr unsigned int mmUMCCH11_PerfMonCtl4 = 0xD40474D; - constexpr unsigned int mmUMCCH11_PerfMonCtl5 = 0xD404750; - constexpr unsigned int mmUMCCH11_PerfMonCtlClk = 0xD404740; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Hi = 0xD404746; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Lo = 0xD404745; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Hi = 0xD404749; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Lo = 0xD404748; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Hi = 0xD40474C; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Lo = 0xD40474B; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Hi = 0xD40474F; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Lo = 0xD40474E; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Hi = 0xD404752; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Lo = 0xD404751; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Hi = 0xD404742; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Lo = 0xD404741; - constexpr unsigned int mmUMCCH12_PerfMonCtl1 = 0xDC00344; - constexpr unsigned int mmUMCCH12_PerfMonCtl2 = 0xDC00347; - constexpr unsigned int mmUMCCH12_PerfMonCtl3 = 0xDC0034A; - constexpr unsigned int mmUMCCH12_PerfMonCtl4 = 0xDC0034D; - constexpr unsigned int mmUMCCH12_PerfMonCtl5 = 0xDC00350; - constexpr unsigned int mmUMCCH12_PerfMonCtlClk = 0xDC00340; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Hi = 0xDC00346; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Lo = 0xDC00345; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Hi = 0xDC00349; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Lo = 0xDC00348; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Hi = 0xDC0034C; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Lo = 0xDC0034B; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Hi = 0xDC0034F; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Lo = 0xDC0034E; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Hi = 0xDC00352; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Lo = 0xDC00351; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Hi = 0xDC00342; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Lo = 0xDC00341; - constexpr unsigned int mmUMCCH13_PerfMonCtl1 = 0xDC00744; - constexpr unsigned int mmUMCCH13_PerfMonCtl2 = 0xDC00747; - constexpr unsigned int mmUMCCH13_PerfMonCtl3 = 0xDC0074A; - constexpr unsigned int mmUMCCH13_PerfMonCtl4 = 0xDC0074D; - constexpr unsigned int mmUMCCH13_PerfMonCtl5 = 0xDC00750; - constexpr unsigned int mmUMCCH13_PerfMonCtlClk = 0xDC00740; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Hi = 0xDC00746; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Lo = 0xDC00745; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Hi = 0xDC00749; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Lo = 0xDC00748; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Hi = 0xDC0074C; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Lo = 0xDC0074B; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Hi = 0xDC0074F; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Lo = 0xDC0074E; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Hi = 0xDC00752; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Lo = 0xDC00751; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Hi = 0xDC00742; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Lo = 0xDC00741; - constexpr unsigned int mmUMCCH14_PerfMonCtl1 = 0xDC04344; - constexpr unsigned int mmUMCCH14_PerfMonCtl2 = 0xDC04347; - constexpr unsigned int mmUMCCH14_PerfMonCtl3 = 0xDC0434A; - constexpr unsigned int mmUMCCH14_PerfMonCtl4 = 0xDC0434D; - constexpr unsigned int mmUMCCH14_PerfMonCtl5 = 0xDC04350; - constexpr unsigned int mmUMCCH14_PerfMonCtlClk = 0xDC04340; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Hi = 0xDC04346; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Lo = 0xDC04345; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Hi = 0xDC04349; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Lo = 0xDC04348; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Hi = 0xDC0434C; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Lo = 0xDC0434B; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Hi = 0xDC0434F; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Lo = 0xDC0434E; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Hi = 0xDC04352; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Lo = 0xDC04351; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Hi = 0xDC04342; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Lo = 0xDC04341; - constexpr unsigned int mmUMCCH15_PerfMonCtl1 = 0xDC04744; - constexpr unsigned int mmUMCCH15_PerfMonCtl2 = 0xDC04747; - constexpr unsigned int mmUMCCH15_PerfMonCtl3 = 0xDC0474A; - constexpr unsigned int mmUMCCH15_PerfMonCtl4 = 0xDC0474D; - constexpr unsigned int mmUMCCH15_PerfMonCtl5 = 0xDC04750; - constexpr unsigned int mmUMCCH15_PerfMonCtlClk = 0xDC04740; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Hi = 0xDC04746; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Lo = 0xDC04745; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Hi = 0xDC04749; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Lo = 0xDC04748; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Hi = 0xDC0474C; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Lo = 0xDC0474B; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Hi = 0xDC0474F; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Lo = 0xDC0474E; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Hi = 0xDC04752; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Lo = 0xDC04751; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Hi = 0xDC04742; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Lo = 0xDC04741; - constexpr unsigned int mmUMCCH16_PerfMonCtl1 = 0xE400344; - constexpr unsigned int mmUMCCH16_PerfMonCtl2 = 0xE400347; - constexpr unsigned int mmUMCCH16_PerfMonCtl3 = 0xE40034A; - constexpr unsigned int mmUMCCH16_PerfMonCtl4 = 0xE40034D; - constexpr unsigned int mmUMCCH16_PerfMonCtl5 = 0xE400350; - constexpr unsigned int mmUMCCH16_PerfMonCtlClk = 0xE400340; - constexpr unsigned int mmUMCCH16_PerfMonCtr1_Hi = 0xE400346; - constexpr unsigned int mmUMCCH16_PerfMonCtr1_Lo = 0xE400345; - constexpr unsigned int mmUMCCH16_PerfMonCtr2_Hi = 0xE400349; - constexpr unsigned int mmUMCCH16_PerfMonCtr2_Lo = 0xE400348; - constexpr unsigned int mmUMCCH16_PerfMonCtr3_Hi = 0xE40034C; - constexpr unsigned int mmUMCCH16_PerfMonCtr3_Lo = 0xE40034B; - constexpr unsigned int mmUMCCH16_PerfMonCtr4_Hi = 0xE40034F; - constexpr unsigned int mmUMCCH16_PerfMonCtr4_Lo = 0xE40034E; - constexpr unsigned int mmUMCCH16_PerfMonCtr5_Hi = 0xE400352; - constexpr unsigned int mmUMCCH16_PerfMonCtr5_Lo = 0xE400351; - constexpr unsigned int mmUMCCH16_PerfMonCtrClk_Hi = 0xE400342; - constexpr unsigned int mmUMCCH16_PerfMonCtrClk_Lo = 0xE400341; - constexpr unsigned int mmUMCCH17_PerfMonCtl1 = 0xE400744; - constexpr unsigned int mmUMCCH17_PerfMonCtl2 = 0xE400747; - constexpr unsigned int mmUMCCH17_PerfMonCtl3 = 0xE40074A; - constexpr unsigned int mmUMCCH17_PerfMonCtl4 = 0xE40074D; - constexpr unsigned int mmUMCCH17_PerfMonCtl5 = 0xE400750; - constexpr unsigned int mmUMCCH17_PerfMonCtlClk = 0xE400740; - constexpr unsigned int mmUMCCH17_PerfMonCtr1_Hi = 0xE400746; - constexpr unsigned int mmUMCCH17_PerfMonCtr1_Lo = 0xE400745; - constexpr unsigned int mmUMCCH17_PerfMonCtr2_Hi = 0xE400749; - constexpr unsigned int mmUMCCH17_PerfMonCtr2_Lo = 0xE400748; - constexpr unsigned int mmUMCCH17_PerfMonCtr3_Hi = 0xE40074C; - constexpr unsigned int mmUMCCH17_PerfMonCtr3_Lo = 0xE40074B; - constexpr unsigned int mmUMCCH17_PerfMonCtr4_Hi = 0xE40074F; - constexpr unsigned int mmUMCCH17_PerfMonCtr4_Lo = 0xE40074E; - constexpr unsigned int mmUMCCH17_PerfMonCtr5_Hi = 0xE400752; - constexpr unsigned int mmUMCCH17_PerfMonCtr5_Lo = 0xE400751; - constexpr unsigned int mmUMCCH17_PerfMonCtrClk_Hi = 0xE400742; - constexpr unsigned int mmUMCCH17_PerfMonCtrClk_Lo = 0xE400741; - constexpr unsigned int mmUMCCH18_PerfMonCtl1 = 0xE404344; - constexpr unsigned int mmUMCCH18_PerfMonCtl2 = 0xE404347; - constexpr unsigned int mmUMCCH18_PerfMonCtl3 = 0xE40434A; - constexpr unsigned int mmUMCCH18_PerfMonCtl4 = 0xE40434D; - constexpr unsigned int mmUMCCH18_PerfMonCtl5 = 0xE404350; - constexpr unsigned int mmUMCCH18_PerfMonCtlClk = 0xE404340; - constexpr unsigned int mmUMCCH18_PerfMonCtr1_Hi = 0xE404346; - constexpr unsigned int mmUMCCH18_PerfMonCtr1_Lo = 0xE404345; - constexpr unsigned int mmUMCCH18_PerfMonCtr2_Hi = 0xE404349; - constexpr unsigned int mmUMCCH18_PerfMonCtr2_Lo = 0xE404348; - constexpr unsigned int mmUMCCH18_PerfMonCtr3_Hi = 0xE40434C; - constexpr unsigned int mmUMCCH18_PerfMonCtr3_Lo = 0xE40434B; - constexpr unsigned int mmUMCCH18_PerfMonCtr4_Hi = 0xE40434F; - constexpr unsigned int mmUMCCH18_PerfMonCtr4_Lo = 0xE40434E; - constexpr unsigned int mmUMCCH18_PerfMonCtr5_Hi = 0xE404352; - constexpr unsigned int mmUMCCH18_PerfMonCtr5_Lo = 0xE404351; - constexpr unsigned int mmUMCCH18_PerfMonCtrClk_Hi = 0xE404342; - constexpr unsigned int mmUMCCH18_PerfMonCtrClk_Lo = 0xE404341; - constexpr unsigned int mmUMCCH19_PerfMonCtl1 = 0xE404744; - constexpr unsigned int mmUMCCH19_PerfMonCtl2 = 0xE404747; - constexpr unsigned int mmUMCCH19_PerfMonCtl3 = 0xE40474A; - constexpr unsigned int mmUMCCH19_PerfMonCtl4 = 0xE40474D; - constexpr unsigned int mmUMCCH19_PerfMonCtl5 = 0xE404750; - constexpr unsigned int mmUMCCH19_PerfMonCtlClk = 0xE404740; - constexpr unsigned int mmUMCCH19_PerfMonCtr1_Hi = 0xE404746; - constexpr unsigned int mmUMCCH19_PerfMonCtr1_Lo = 0xE404745; - constexpr unsigned int mmUMCCH19_PerfMonCtr2_Hi = 0xE404749; - constexpr unsigned int mmUMCCH19_PerfMonCtr2_Lo = 0xE404748; - constexpr unsigned int mmUMCCH19_PerfMonCtr3_Hi = 0xE40474C; - constexpr unsigned int mmUMCCH19_PerfMonCtr3_Lo = 0xE40474B; - constexpr unsigned int mmUMCCH19_PerfMonCtr4_Hi = 0xE40474F; - constexpr unsigned int mmUMCCH19_PerfMonCtr4_Lo = 0xE40474E; - constexpr unsigned int mmUMCCH19_PerfMonCtr5_Hi = 0xE404752; - constexpr unsigned int mmUMCCH19_PerfMonCtr5_Lo = 0xE404751; - constexpr unsigned int mmUMCCH19_PerfMonCtrClk_Hi = 0xE404742; - constexpr unsigned int mmUMCCH19_PerfMonCtrClk_Lo = 0xE404741; - constexpr unsigned int mmUMCCH20_PerfMonCtl1 = 0xEC00344; - constexpr unsigned int mmUMCCH20_PerfMonCtl2 = 0xEC00347; - constexpr unsigned int mmUMCCH20_PerfMonCtl3 = 0xEC0034A; - constexpr unsigned int mmUMCCH20_PerfMonCtl4 = 0xEC0034D; - constexpr unsigned int mmUMCCH20_PerfMonCtl5 = 0xEC00350; - constexpr unsigned int mmUMCCH20_PerfMonCtlClk = 0xEC00340; - constexpr unsigned int mmUMCCH20_PerfMonCtr1_Hi = 0xEC00346; - constexpr unsigned int mmUMCCH20_PerfMonCtr1_Lo = 0xEC00345; - constexpr unsigned int mmUMCCH20_PerfMonCtr2_Hi = 0xEC00349; - constexpr unsigned int mmUMCCH20_PerfMonCtr2_Lo = 0xEC00348; - constexpr unsigned int mmUMCCH20_PerfMonCtr3_Hi = 0xEC0034C; - constexpr unsigned int mmUMCCH20_PerfMonCtr3_Lo = 0xEC0034B; - constexpr unsigned int mmUMCCH20_PerfMonCtr4_Hi = 0xEC0034F; - constexpr unsigned int mmUMCCH20_PerfMonCtr4_Lo = 0xEC0034E; - constexpr unsigned int mmUMCCH20_PerfMonCtr5_Hi = 0xEC00352; - constexpr unsigned int mmUMCCH20_PerfMonCtr5_Lo = 0xEC00351; - constexpr unsigned int mmUMCCH20_PerfMonCtrClk_Hi = 0xEC00342; - constexpr unsigned int mmUMCCH20_PerfMonCtrClk_Lo = 0xEC00341; - constexpr unsigned int mmUMCCH21_PerfMonCtl1 = 0xEC00744; - constexpr unsigned int mmUMCCH21_PerfMonCtl2 = 0xEC00747; - constexpr unsigned int mmUMCCH21_PerfMonCtl3 = 0xEC0074A; - constexpr unsigned int mmUMCCH21_PerfMonCtl4 = 0xEC0074D; - constexpr unsigned int mmUMCCH21_PerfMonCtl5 = 0xEC00750; - constexpr unsigned int mmUMCCH21_PerfMonCtlClk = 0xEC00740; - constexpr unsigned int mmUMCCH21_PerfMonCtr1_Hi = 0xEC00746; - constexpr unsigned int mmUMCCH21_PerfMonCtr1_Lo = 0xEC00745; - constexpr unsigned int mmUMCCH21_PerfMonCtr2_Hi = 0xEC00749; - constexpr unsigned int mmUMCCH21_PerfMonCtr2_Lo = 0xEC00748; - constexpr unsigned int mmUMCCH21_PerfMonCtr3_Hi = 0xEC0074C; - constexpr unsigned int mmUMCCH21_PerfMonCtr3_Lo = 0xEC0074B; - constexpr unsigned int mmUMCCH21_PerfMonCtr4_Hi = 0xEC0074F; - constexpr unsigned int mmUMCCH21_PerfMonCtr4_Lo = 0xEC0074E; - constexpr unsigned int mmUMCCH21_PerfMonCtr5_Hi = 0xEC00752; - constexpr unsigned int mmUMCCH21_PerfMonCtr5_Lo = 0xEC00751; - constexpr unsigned int mmUMCCH21_PerfMonCtrClk_Hi = 0xEC00742; - constexpr unsigned int mmUMCCH21_PerfMonCtrClk_Lo = 0xEC00741; - constexpr unsigned int mmUMCCH22_PerfMonCtl1 = 0xEC04344; - constexpr unsigned int mmUMCCH22_PerfMonCtl2 = 0xEC04347; - constexpr unsigned int mmUMCCH22_PerfMonCtl3 = 0xEC0434A; - constexpr unsigned int mmUMCCH22_PerfMonCtl4 = 0xEC0434D; - constexpr unsigned int mmUMCCH22_PerfMonCtl5 = 0xEC04350; - constexpr unsigned int mmUMCCH22_PerfMonCtlClk = 0xEC04340; - constexpr unsigned int mmUMCCH22_PerfMonCtr1_Hi = 0xEC04346; - constexpr unsigned int mmUMCCH22_PerfMonCtr1_Lo = 0xEC04345; - constexpr unsigned int mmUMCCH22_PerfMonCtr2_Hi = 0xEC04349; - constexpr unsigned int mmUMCCH22_PerfMonCtr2_Lo = 0xEC04348; - constexpr unsigned int mmUMCCH22_PerfMonCtr3_Hi = 0xEC0434C; - constexpr unsigned int mmUMCCH22_PerfMonCtr3_Lo = 0xEC0434B; - constexpr unsigned int mmUMCCH22_PerfMonCtr4_Hi = 0xEC0434F; - constexpr unsigned int mmUMCCH22_PerfMonCtr4_Lo = 0xEC0434E; - constexpr unsigned int mmUMCCH22_PerfMonCtr5_Hi = 0xEC04352; - constexpr unsigned int mmUMCCH22_PerfMonCtr5_Lo = 0xEC04351; - constexpr unsigned int mmUMCCH22_PerfMonCtrClk_Hi = 0xEC04342; - constexpr unsigned int mmUMCCH22_PerfMonCtrClk_Lo = 0xEC04341; - constexpr unsigned int mmUMCCH23_PerfMonCtl1 = 0xEC04744; - constexpr unsigned int mmUMCCH23_PerfMonCtl2 = 0xEC04747; - constexpr unsigned int mmUMCCH23_PerfMonCtl3 = 0xEC0474A; - constexpr unsigned int mmUMCCH23_PerfMonCtl4 = 0xEC0474D; - constexpr unsigned int mmUMCCH23_PerfMonCtl5 = 0xEC04750; - constexpr unsigned int mmUMCCH23_PerfMonCtlClk = 0xEC04740; - constexpr unsigned int mmUMCCH23_PerfMonCtr1_Hi = 0xEC04746; - constexpr unsigned int mmUMCCH23_PerfMonCtr1_Lo = 0xEC04745; - constexpr unsigned int mmUMCCH23_PerfMonCtr2_Hi = 0xEC04749; - constexpr unsigned int mmUMCCH23_PerfMonCtr2_Lo = 0xEC04748; - constexpr unsigned int mmUMCCH23_PerfMonCtr3_Hi = 0xEC0474C; - constexpr unsigned int mmUMCCH23_PerfMonCtr3_Lo = 0xEC0474B; - constexpr unsigned int mmUMCCH23_PerfMonCtr4_Hi = 0xEC0474F; - constexpr unsigned int mmUMCCH23_PerfMonCtr4_Lo = 0xEC0474E; - constexpr unsigned int mmUMCCH23_PerfMonCtr5_Hi = 0xEC04752; - constexpr unsigned int mmUMCCH23_PerfMonCtr5_Lo = 0xEC04751; - constexpr unsigned int mmUMCCH23_PerfMonCtrClk_Hi = 0xEC04742; - constexpr unsigned int mmUMCCH23_PerfMonCtrClk_Lo = 0xEC04741; -} // namespace Nv31 -#endif - -#if CHIP_HDR_NAVI32 -namespace Nv32 -{ - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER0_CFG = 0xDD44; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER1_CFG = 0xDD45; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_HI = 0xD4F5; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_LO = 0xD4F4; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_MODE = 0xDD42; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_SELECT = 0xDD40; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_SELECT1 = 0xDD41; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_HI = 0xD4F9; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_LO = 0xD4F8; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_RSLT_CNTL = 0xDD46; - constexpr unsigned int mmGE_FED_STATUS = 0x224A; - constexpr unsigned int mmGRBM_SE3_PERFCOUNTER_HI = 0xD04C; - constexpr unsigned int mmGRBM_SE3_PERFCOUNTER_LO = 0xD04B; - constexpr unsigned int mmGRBM_SE3_PERFCOUNTER_SELECT = 0xD845; - constexpr unsigned int mmRPB_PERFCOUNTER0_CFG = 0x0C80; - constexpr unsigned int mmRPB_PERFCOUNTER1_CFG = 0x0C81; - constexpr unsigned int mmRPB_PERFCOUNTER2_CFG = 0x0C82; - constexpr unsigned int mmRPB_PERFCOUNTER3_CFG = 0x0C83; - constexpr unsigned int mmRPB_PERFCOUNTER_HI = 0x0C86; - constexpr unsigned int mmRPB_PERFCOUNTER_LO = 0x0C87; - constexpr unsigned int mmRPB_PERFCOUNTER_RSLT_CNTL = 0x0C84; - constexpr unsigned int mmRPB_PERF_COUNTER_CNTL = 0x0C85; -} // namespace Nv32 -#endif - -#if CHIP_HDR_NAVI33 -namespace Nv33 -{ - constexpr unsigned int mmRPB_PERFCOUNTER0_CFG = 0x0C80; - constexpr unsigned int mmRPB_PERFCOUNTER1_CFG = 0x0C81; - constexpr unsigned int mmRPB_PERFCOUNTER2_CFG = 0x0C82; - constexpr unsigned int mmRPB_PERFCOUNTER3_CFG = 0x0C83; - constexpr unsigned int mmRPB_PERFCOUNTER_HI = 0x0C86; - constexpr unsigned int mmRPB_PERFCOUNTER_LO = 0x0C87; - constexpr unsigned int mmRPB_PERFCOUNTER_RSLT_CNTL = 0x0C84; - constexpr unsigned int mmRPB_PERF_COUNTER_CNTL = 0x0C85; -} // namespace Nv33 -#endif - -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -namespace Nv3x -{ - constexpr unsigned int mmCHCG_PERFCOUNTER0_HI = 0xD3C9; - constexpr unsigned int mmCHCG_PERFCOUNTER0_LO = 0xD3C8; - constexpr unsigned int mmCHCG_PERFCOUNTER0_SELECT = 0xDBC6; - constexpr unsigned int mmCHCG_PERFCOUNTER0_SELECT1 = 0xDBC7; - constexpr unsigned int mmCHCG_PERFCOUNTER1_HI = 0xD3CB; - constexpr unsigned int mmCHCG_PERFCOUNTER1_LO = 0xD3CA; - constexpr unsigned int mmCHCG_PERFCOUNTER1_SELECT = 0xDBC8; - constexpr unsigned int mmCHCG_PERFCOUNTER2_HI = 0xD3CD; - constexpr unsigned int mmCHCG_PERFCOUNTER2_LO = 0xD3CC; - constexpr unsigned int mmCHCG_PERFCOUNTER2_SELECT = 0xDBC9; - constexpr unsigned int mmCHCG_PERFCOUNTER3_HI = 0xD3CF; - constexpr unsigned int mmCHCG_PERFCOUNTER3_LO = 0xD3CE; - constexpr unsigned int mmCHCG_PERFCOUNTER3_SELECT = 0xDBCA; - constexpr unsigned int mmGRBM_SE2_PERFCOUNTER_HI = 0xD04A; - constexpr unsigned int mmGRBM_SE2_PERFCOUNTER_LO = 0xD049; - constexpr unsigned int mmGRBM_SE2_PERFCOUNTER_SELECT = 0xD844; - constexpr unsigned int mmGUS_PERFCOUNTER0_CFG = 0xDE03; - constexpr unsigned int mmGUS_PERFCOUNTER1_CFG = 0xDE04; - constexpr unsigned int mmGUS_PERFCOUNTER2_HI = 0xD641; - constexpr unsigned int mmGUS_PERFCOUNTER2_LO = 0xD640; - constexpr unsigned int mmGUS_PERFCOUNTER2_MODE = 0xDE02; - constexpr unsigned int mmGUS_PERFCOUNTER2_SELECT = 0xDE00; - constexpr unsigned int mmGUS_PERFCOUNTER2_SELECT1 = 0xDE01; - constexpr unsigned int mmGUS_PERFCOUNTER_HI = 0xD643; - constexpr unsigned int mmGUS_PERFCOUNTER_LO = 0xD642; - constexpr unsigned int mmGUS_PERFCOUNTER_RSLT_CNTL = 0xDE05; - constexpr unsigned int mmSDMA1_PERFCNT_MISC_CNTL = 0xDE2F; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER0_CFG = 0xDE2C; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER1_CFG = 0xDE2D; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER_HI = 0xD66D; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER_LO = 0xD66C; - constexpr unsigned int mmSDMA1_PERFCNT_PERFCOUNTER_RSLT_CNTL = 0xDE2E; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_HI = 0xD66F; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_LO = 0xD66E; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_SELECT = 0xDE30; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_SELECT1 = 0xDE31; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_HI = 0xD671; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_LO = 0xD670; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_SELECT = 0xDE32; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_SELECT1 = 0xDE33; -} // namespace Nv3x -#endif - -namespace Oss50 -{ - constexpr unsigned int mmSDMA0_PERFCOUNTER0_HI = 0x12CB; - constexpr unsigned int mmSDMA0_PERFCOUNTER0_LO = 0x12CA; - constexpr unsigned int mmSDMA0_PERFCOUNTER0_RESULT = 0x12B8; - constexpr unsigned int mmSDMA0_PERFCOUNTER0_SELECT = 0x12C8; - constexpr unsigned int mmSDMA0_PERFCOUNTER0_SELECT1 = 0x12C9; - constexpr unsigned int mmSDMA0_PERFCOUNTER1_HI = 0x12CF; - constexpr unsigned int mmSDMA0_PERFCOUNTER1_LO = 0x12CE; - constexpr unsigned int mmSDMA0_PERFCOUNTER1_RESULT = 0x12B9; - constexpr unsigned int mmSDMA0_PERFCOUNTER1_SELECT = 0x12CC; - constexpr unsigned int mmSDMA0_PERFCOUNTER1_SELECT1 = 0x12CD; - constexpr unsigned int mmSDMA0_PERFCOUNTER_TAG_DELAY_RANGE = 0x12BA; - constexpr unsigned int mmSDMA0_PERFMON_CNTL = 0x12B7; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_HI = 0x18CB; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_LO = 0x18CA; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_RESULT = 0x18B8; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_SELECT = 0x18C8; - constexpr unsigned int mmSDMA1_PERFCOUNTER0_SELECT1 = 0x18C9; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_HI = 0x18CF; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_LO = 0x18CE; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_RESULT = 0x18B9; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_SELECT = 0x18CC; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_SELECT1 = 0x18CD; - constexpr unsigned int mmSDMA1_PERFCOUNTER_TAG_DELAY_RANGE = 0x18BA; - constexpr unsigned int mmSDMA1_PERFMON_CNTL = 0x18B7; -} // namespace Oss50 - -#if CHIP_HDR_PHOENIX1 -namespace Phx1 -{ - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi0 = 0x12400C81; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi1 = 0x12400C83; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi2 = 0x12400C85; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi3 = 0x12400C87; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi4 = 0x12400C89; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi5 = 0x12400C8B; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi6 = 0x12400C8D; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi7 = 0x12400C8F; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo0 = 0x12400C80; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo1 = 0x12400C82; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo2 = 0x12400C84; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo3 = 0x12400C86; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo4 = 0x12400C88; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo5 = 0x12400C8A; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo6 = 0x12400C8C; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo7 = 0x12400C8E; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi0 = 0x12400CC1; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi1 = 0x12400CC3; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi2 = 0x12400CC5; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi3 = 0x12400CC7; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi4 = 0x12400CC9; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi5 = 0x12400CCB; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi6 = 0x12400CCD; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi7 = 0x12400CCF; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo0 = 0x12400CC0; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo1 = 0x12400CC2; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo2 = 0x12400CC4; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo3 = 0x12400CC6; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo4 = 0x12400CC8; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo5 = 0x12400CCA; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo6 = 0x12400CCC; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo7 = 0x12400CCE; - constexpr unsigned int mmMP1_SMN_FPS_CNT = 0x162C4; -} // namespace Phx1 -#endif - -namespace Raphael -{ - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi0 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi1 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi2 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi3 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi4 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi5 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi6 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi7 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi8 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi9 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi10 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi11 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi12 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi13 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi14 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi15 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo0 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo1 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo2 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo3 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo4 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo5 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo6 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo7 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo8 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo9 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo10 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo11 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo12 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo13 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo14 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo15 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi0 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi1 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi2 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi3 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi4 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi5 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi6 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi7 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi8 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi9 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi10 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi11 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi12 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi13 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi14 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi15 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo0 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo1 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo2 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo3 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo4 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo5 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo6 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo7 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo8 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo9 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo10 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo11 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo12 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo13 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo14 = 0x0001; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo15 = 0x0001; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER0_CFG = 0xDD20; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER1_CFG = 0xDD21; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_HI = 0xD4FD; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_LO = 0xD4FC; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_MODE = 0xDD4E; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_SELECT = 0xDD4C; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_SELECT1 = 0xDD4D; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_HI = 0xD4E1; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_LO = 0xD4E0; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_RSLT_CNTL = 0xDD22; - constexpr unsigned int mmMP1_SMN_FPS_CNT = 0x162C4; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_HIGHCOUNT = 0xDCA5; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_LOWCOUNT = 0xDCA4; - constexpr unsigned int mmSPI_CONFIG_CNTL = 0xC440; - constexpr unsigned int mmSPI_CONFIG_CNTL_1 = 0xC441; - constexpr unsigned int mmSPI_CONFIG_CNTL_2 = 0xC442; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL = 0xC443; - constexpr unsigned int mmUMCCH1_PerfMonCtl1 = 0x14B41; - constexpr unsigned int mmUMCCH1_PerfMonCtl2 = 0x14B42; - constexpr unsigned int mmUMCCH1_PerfMonCtl3 = 0x14B43; - constexpr unsigned int mmUMCCH1_PerfMonCtl4 = 0x14B44; - constexpr unsigned int mmUMCCH1_PerfMonCtl5 = 0x14B45; - constexpr unsigned int mmUMCCH1_PerfMonCtlClk = 0x14B40; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Hi = 0x14B4B; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Lo = 0x14B4A; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Hi = 0x14B4D; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Lo = 0x14B4C; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Hi = 0x14B4F; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Lo = 0x14B4E; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Hi = 0x14B51; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Lo = 0x14B50; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Hi = 0x14B53; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Lo = 0x14B52; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Hi = 0x14B49; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Lo = 0x14B48; - constexpr unsigned int mmUMCCH3_PerfMonCtl1 = 0x54B41; - constexpr unsigned int mmUMCCH3_PerfMonCtl2 = 0x54B42; - constexpr unsigned int mmUMCCH3_PerfMonCtl3 = 0x54B43; - constexpr unsigned int mmUMCCH3_PerfMonCtl4 = 0x54B44; - constexpr unsigned int mmUMCCH3_PerfMonCtl5 = 0x54B45; - constexpr unsigned int mmUMCCH3_PerfMonCtlClk = 0x54B40; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Hi = 0x54B4B; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Lo = 0x54B4A; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Hi = 0x54B4D; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Lo = 0x54B4C; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Hi = 0x54B4F; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Lo = 0x54B4E; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Hi = 0x54B51; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Lo = 0x54B50; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Hi = 0x54B53; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Lo = 0x54B52; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Hi = 0x54B49; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Lo = 0x54B48; - constexpr unsigned int mmUMCCH4_PerfMonCtl1 = 0x94341; - constexpr unsigned int mmUMCCH4_PerfMonCtl2 = 0x94342; - constexpr unsigned int mmUMCCH4_PerfMonCtl3 = 0x94343; - constexpr unsigned int mmUMCCH4_PerfMonCtl4 = 0x94344; - constexpr unsigned int mmUMCCH4_PerfMonCtl5 = 0x94345; - constexpr unsigned int mmUMCCH4_PerfMonCtlClk = 0x94340; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Hi = 0x9434B; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Lo = 0x9434A; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Hi = 0x9434D; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Lo = 0x9434C; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Hi = 0x9434F; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Lo = 0x9434E; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Hi = 0x94351; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Lo = 0x94350; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Hi = 0x94353; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Lo = 0x94352; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Hi = 0x94349; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Lo = 0x94348; - constexpr unsigned int mmUMCCH5_PerfMonCtl1 = 0x94B41; - constexpr unsigned int mmUMCCH5_PerfMonCtl2 = 0x94B42; - constexpr unsigned int mmUMCCH5_PerfMonCtl3 = 0x94B43; - constexpr unsigned int mmUMCCH5_PerfMonCtl4 = 0x94B44; - constexpr unsigned int mmUMCCH5_PerfMonCtl5 = 0x94B45; - constexpr unsigned int mmUMCCH5_PerfMonCtlClk = 0x94B40; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Hi = 0x94B4B; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Lo = 0x94B4A; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Hi = 0x94B4D; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Lo = 0x94B4C; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Hi = 0x94B4F; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Lo = 0x94B4E; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Hi = 0x94B51; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Lo = 0x94B50; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Hi = 0x94B53; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Lo = 0x94B52; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Hi = 0x94B49; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Lo = 0x94B48; - constexpr unsigned int mmUMCCH6_PerfMonCtl1 = 0xD4341; - constexpr unsigned int mmUMCCH6_PerfMonCtl2 = 0xD4342; - constexpr unsigned int mmUMCCH6_PerfMonCtl3 = 0xD4343; - constexpr unsigned int mmUMCCH6_PerfMonCtl4 = 0xD4344; - constexpr unsigned int mmUMCCH6_PerfMonCtl5 = 0xD4345; - constexpr unsigned int mmUMCCH6_PerfMonCtlClk = 0xD4340; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Hi = 0xD434B; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Lo = 0xD434A; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Hi = 0xD434D; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Lo = 0xD434C; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Hi = 0xD434F; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Lo = 0xD434E; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Hi = 0xD4351; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Lo = 0xD4350; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Hi = 0xD4353; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Lo = 0xD4352; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Hi = 0xD4349; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Lo = 0xD4348; - constexpr unsigned int mmUMCCH7_PerfMonCtl1 = 0xD4B41; - constexpr unsigned int mmUMCCH7_PerfMonCtl2 = 0xD4B42; - constexpr unsigned int mmUMCCH7_PerfMonCtl3 = 0xD4B43; - constexpr unsigned int mmUMCCH7_PerfMonCtl4 = 0xD4B44; - constexpr unsigned int mmUMCCH7_PerfMonCtl5 = 0xD4B45; - constexpr unsigned int mmUMCCH7_PerfMonCtlClk = 0xD4B40; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Hi = 0xD4B4B; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Lo = 0xD4B4A; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Hi = 0xD4B4D; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Lo = 0xD4B4C; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Hi = 0xD4B4F; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Lo = 0xD4B4E; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Hi = 0xD4B51; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Lo = 0xD4B50; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Hi = 0xD4B53; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Lo = 0xD4B52; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Hi = 0xD4B49; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Lo = 0xD4B48; - constexpr unsigned int mmUMCCH8_PerfMonCtl1 = 0x114341; - constexpr unsigned int mmUMCCH8_PerfMonCtl2 = 0x114342; - constexpr unsigned int mmUMCCH8_PerfMonCtl3 = 0x114343; - constexpr unsigned int mmUMCCH8_PerfMonCtl4 = 0x114344; - constexpr unsigned int mmUMCCH8_PerfMonCtl5 = 0x114345; - constexpr unsigned int mmUMCCH8_PerfMonCtlClk = 0x114340; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Hi = 0x11434B; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Lo = 0x11434A; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Hi = 0x11434D; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Lo = 0x11434C; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Hi = 0x11434F; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Lo = 0x11434E; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Hi = 0x114351; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Lo = 0x114350; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Hi = 0x114353; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Lo = 0x114352; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Hi = 0x114349; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Lo = 0x114348; - constexpr unsigned int mmUMCCH9_PerfMonCtl1 = 0x114B41; - constexpr unsigned int mmUMCCH9_PerfMonCtl2 = 0x114B42; - constexpr unsigned int mmUMCCH9_PerfMonCtl3 = 0x114B43; - constexpr unsigned int mmUMCCH9_PerfMonCtl4 = 0x114B44; - constexpr unsigned int mmUMCCH9_PerfMonCtl5 = 0x114B45; - constexpr unsigned int mmUMCCH9_PerfMonCtlClk = 0x114B40; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Hi = 0x114B4B; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Lo = 0x114B4A; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Hi = 0x114B4D; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Lo = 0x114B4C; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Hi = 0x114B4F; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Lo = 0x114B4E; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Hi = 0x114B51; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Lo = 0x114B50; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Hi = 0x114B53; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Lo = 0x114B52; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Hi = 0x114B49; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Lo = 0x114B48; - constexpr unsigned int mmUMCCH10_PerfMonCtl1 = 0x154341; - constexpr unsigned int mmUMCCH10_PerfMonCtl2 = 0x154342; - constexpr unsigned int mmUMCCH10_PerfMonCtl3 = 0x154343; - constexpr unsigned int mmUMCCH10_PerfMonCtl4 = 0x154344; - constexpr unsigned int mmUMCCH10_PerfMonCtl5 = 0x154345; - constexpr unsigned int mmUMCCH10_PerfMonCtlClk = 0x154340; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Hi = 0x15434B; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Lo = 0x15434A; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Hi = 0x15434D; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Lo = 0x15434C; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Hi = 0x15434F; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Lo = 0x15434E; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Hi = 0x154351; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Lo = 0x154350; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Hi = 0x154353; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Lo = 0x154352; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Hi = 0x154349; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Lo = 0x154348; - constexpr unsigned int mmUMCCH11_PerfMonCtl1 = 0x154B41; - constexpr unsigned int mmUMCCH11_PerfMonCtl2 = 0x154B42; - constexpr unsigned int mmUMCCH11_PerfMonCtl3 = 0x154B43; - constexpr unsigned int mmUMCCH11_PerfMonCtl4 = 0x154B44; - constexpr unsigned int mmUMCCH11_PerfMonCtl5 = 0x154B45; - constexpr unsigned int mmUMCCH11_PerfMonCtlClk = 0x154B40; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Hi = 0x154B4B; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Lo = 0x154B4A; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Hi = 0x154B4D; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Lo = 0x154B4C; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Hi = 0x154B4F; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Lo = 0x154B4E; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Hi = 0x154B51; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Lo = 0x154B50; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Hi = 0x154B53; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Lo = 0x154B52; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Hi = 0x154B49; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Lo = 0x154B48; - constexpr unsigned int mmUMCCH12_PerfMonCtl1 = 0x194341; - constexpr unsigned int mmUMCCH12_PerfMonCtl2 = 0x194342; - constexpr unsigned int mmUMCCH12_PerfMonCtl3 = 0x194343; - constexpr unsigned int mmUMCCH12_PerfMonCtl4 = 0x194344; - constexpr unsigned int mmUMCCH12_PerfMonCtl5 = 0x194345; - constexpr unsigned int mmUMCCH12_PerfMonCtlClk = 0x194340; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Hi = 0x19434B; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Lo = 0x19434A; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Hi = 0x19434D; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Lo = 0x19434C; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Hi = 0x19434F; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Lo = 0x19434E; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Hi = 0x194351; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Lo = 0x194350; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Hi = 0x194353; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Lo = 0x194352; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Hi = 0x194349; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Lo = 0x194348; - constexpr unsigned int mmUMCCH13_PerfMonCtl1 = 0x194B41; - constexpr unsigned int mmUMCCH13_PerfMonCtl2 = 0x194B42; - constexpr unsigned int mmUMCCH13_PerfMonCtl3 = 0x194B43; - constexpr unsigned int mmUMCCH13_PerfMonCtl4 = 0x194B44; - constexpr unsigned int mmUMCCH13_PerfMonCtl5 = 0x194B45; - constexpr unsigned int mmUMCCH13_PerfMonCtlClk = 0x194B40; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Hi = 0x194B4B; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Lo = 0x194B4A; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Hi = 0x194B4D; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Lo = 0x194B4C; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Hi = 0x194B4F; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Lo = 0x194B4E; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Hi = 0x194B51; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Lo = 0x194B50; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Hi = 0x194B53; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Lo = 0x194B52; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Hi = 0x194B49; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Lo = 0x194B48; - constexpr unsigned int mmUMCCH14_PerfMonCtl1 = 0x1D4341; - constexpr unsigned int mmUMCCH14_PerfMonCtl2 = 0x1D4342; - constexpr unsigned int mmUMCCH14_PerfMonCtl3 = 0x1D4343; - constexpr unsigned int mmUMCCH14_PerfMonCtl4 = 0x1D4344; - constexpr unsigned int mmUMCCH14_PerfMonCtl5 = 0x1D4345; - constexpr unsigned int mmUMCCH14_PerfMonCtlClk = 0x1D4340; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Hi = 0x1D434B; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Lo = 0x1D434A; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Hi = 0x1D434D; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Lo = 0x1D434C; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Hi = 0x1D434F; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Lo = 0x1D434E; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Hi = 0x1D4351; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Lo = 0x1D4350; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Hi = 0x1D4353; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Lo = 0x1D4352; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Hi = 0x1D4349; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Lo = 0x1D4348; - constexpr unsigned int mmUMCCH15_PerfMonCtl1 = 0x1D4B41; - constexpr unsigned int mmUMCCH15_PerfMonCtl2 = 0x1D4B42; - constexpr unsigned int mmUMCCH15_PerfMonCtl3 = 0x1D4B43; - constexpr unsigned int mmUMCCH15_PerfMonCtl4 = 0x1D4B44; - constexpr unsigned int mmUMCCH15_PerfMonCtl5 = 0x1D4B45; - constexpr unsigned int mmUMCCH15_PerfMonCtlClk = 0x1D4B40; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Hi = 0x1D4B4B; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Lo = 0x1D4B4A; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Hi = 0x1D4B4D; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Lo = 0x1D4B4C; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Hi = 0x1D4B4F; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Lo = 0x1D4B4E; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Hi = 0x1D4B51; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Lo = 0x1D4B50; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Hi = 0x1D4B53; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Lo = 0x1D4B52; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Hi = 0x1D4B49; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Lo = 0x1D4B48; - constexpr unsigned int mmVGT_ESGS_RING_SIZE = 0xC240; - constexpr unsigned int mmVGT_GSVS_RING_SIZE = 0xC241; - constexpr unsigned int mmVGT_HS_OFFCHIP_PARAM = 0xC24F; - constexpr unsigned int mmVGT_TF_MEMORY_BASE = 0xC250; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI = 0xC261; - constexpr unsigned int mmVGT_TF_RING_SIZE = 0xC24E; -} // namespace Raphael - -namespace Raven -{ - constexpr unsigned int mmUMCCH1_PerfMonCtl1 = 0x54341; - constexpr unsigned int mmUMCCH1_PerfMonCtl2 = 0x54342; - constexpr unsigned int mmUMCCH1_PerfMonCtl3 = 0x54343; - constexpr unsigned int mmUMCCH1_PerfMonCtl4 = 0x54344; - constexpr unsigned int mmUMCCH1_PerfMonCtl5 = 0x54345; - constexpr unsigned int mmUMCCH1_PerfMonCtlClk = 0x54340; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Hi = 0x5434B; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Lo = 0x5434A; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Hi = 0x5434D; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Lo = 0x5434C; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Hi = 0x5434F; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Lo = 0x5434E; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Hi = 0x54351; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Lo = 0x54350; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Hi = 0x54353; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Lo = 0x54352; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Hi = 0x54349; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Lo = 0x54348; -} // namespace Raven - -namespace Rembrandt -{ - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi0 = 0x7511; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi1 = 0x7515; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi2 = 0x7519; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi3 = 0x751D; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi4 = 0x7621; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi5 = 0x7623; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi6 = 0x7625; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi7 = 0x7627; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo0 = 0x7510; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo1 = 0x7514; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo2 = 0x7518; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo3 = 0x751C; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo4 = 0x7620; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo5 = 0x7622; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo6 = 0x7624; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo7 = 0x7626; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi0 = 0x7513; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi1 = 0x7517; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi2 = 0x751B; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi3 = 0x751F; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi4 = 0x75E5; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi5 = 0x75E7; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi6 = 0x75E9; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi7 = 0x75EB; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo0 = 0x7512; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo1 = 0x7516; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo2 = 0x751A; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo3 = 0x751E; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo4 = 0x75E4; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo5 = 0x75E6; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo6 = 0x75E8; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo7 = 0x75EA; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER0_CFG = 0xDD20; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER1_CFG = 0xDD21; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_HI = 0xD4FD; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_LO = 0xD4FC; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_MODE = 0xDD4E; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_SELECT = 0xDD4C; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER2_SELECT1 = 0xDD4D; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_HI = 0xD4E1; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_LO = 0xD4E0; - constexpr unsigned int mmGC_ATC_L2_PERFCOUNTER_RSLT_CNTL = 0xDD22; - constexpr unsigned int mmMP1_SMN_FPS_CNT = 0x162C4; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_HIGHCOUNT = 0xDCA5; - constexpr unsigned int mmRLC_SPM_GFXCLOCK_LOWCOUNT = 0xDCA4; - constexpr unsigned int mmSPI_CONFIG_CNTL = 0xC440; - constexpr unsigned int mmSPI_CONFIG_CNTL_1 = 0xC441; - constexpr unsigned int mmSPI_CONFIG_CNTL_2 = 0xC442; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_2 = 0x31DE; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_3 = 0x31DF; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_4 = 0x31E0; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_CU_5 = 0x31E1; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_2 = 0x31E8; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_3 = 0x31E9; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_4 = 0x31EA; - constexpr unsigned int mmSPI_RESOURCE_RESERVE_EN_CU_5 = 0x31EB; - constexpr unsigned int mmSPI_WAVE_LIMIT_CNTL = 0xC443; - constexpr unsigned int mmUMCCH1_PerfMonCtl1 = 0x14B41; - constexpr unsigned int mmUMCCH1_PerfMonCtl2 = 0x14B42; - constexpr unsigned int mmUMCCH1_PerfMonCtl3 = 0x14B43; - constexpr unsigned int mmUMCCH1_PerfMonCtl4 = 0x14B44; - constexpr unsigned int mmUMCCH1_PerfMonCtl5 = 0x14B45; - constexpr unsigned int mmUMCCH1_PerfMonCtlClk = 0x14B40; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Hi = 0x14B4B; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Lo = 0x14B4A; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Hi = 0x14B4D; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Lo = 0x14B4C; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Hi = 0x14B4F; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Lo = 0x14B4E; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Hi = 0x14B51; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Lo = 0x14B50; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Hi = 0x14B53; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Lo = 0x14B52; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Hi = 0x14B49; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Lo = 0x14B48; - constexpr unsigned int mmUMCCH3_PerfMonCtl1 = 0x54B41; - constexpr unsigned int mmUMCCH3_PerfMonCtl2 = 0x54B42; - constexpr unsigned int mmUMCCH3_PerfMonCtl3 = 0x54B43; - constexpr unsigned int mmUMCCH3_PerfMonCtl4 = 0x54B44; - constexpr unsigned int mmUMCCH3_PerfMonCtl5 = 0x54B45; - constexpr unsigned int mmUMCCH3_PerfMonCtlClk = 0x54B40; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Hi = 0x54B4B; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Lo = 0x54B4A; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Hi = 0x54B4D; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Lo = 0x54B4C; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Hi = 0x54B4F; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Lo = 0x54B4E; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Hi = 0x54B51; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Lo = 0x54B50; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Hi = 0x54B53; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Lo = 0x54B52; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Hi = 0x54B49; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Lo = 0x54B48; - constexpr unsigned int mmUMCCH4_PerfMonCtl1 = 0x94341; - constexpr unsigned int mmUMCCH4_PerfMonCtl2 = 0x94342; - constexpr unsigned int mmUMCCH4_PerfMonCtl3 = 0x94343; - constexpr unsigned int mmUMCCH4_PerfMonCtl4 = 0x94344; - constexpr unsigned int mmUMCCH4_PerfMonCtl5 = 0x94345; - constexpr unsigned int mmUMCCH4_PerfMonCtlClk = 0x94340; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Hi = 0x9434B; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Lo = 0x9434A; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Hi = 0x9434D; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Lo = 0x9434C; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Hi = 0x9434F; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Lo = 0x9434E; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Hi = 0x94351; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Lo = 0x94350; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Hi = 0x94353; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Lo = 0x94352; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Hi = 0x94349; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Lo = 0x94348; - constexpr unsigned int mmUMCCH5_PerfMonCtl1 = 0x94B41; - constexpr unsigned int mmUMCCH5_PerfMonCtl2 = 0x94B42; - constexpr unsigned int mmUMCCH5_PerfMonCtl3 = 0x94B43; - constexpr unsigned int mmUMCCH5_PerfMonCtl4 = 0x94B44; - constexpr unsigned int mmUMCCH5_PerfMonCtl5 = 0x94B45; - constexpr unsigned int mmUMCCH5_PerfMonCtlClk = 0x94B40; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Hi = 0x94B4B; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Lo = 0x94B4A; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Hi = 0x94B4D; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Lo = 0x94B4C; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Hi = 0x94B4F; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Lo = 0x94B4E; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Hi = 0x94B51; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Lo = 0x94B50; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Hi = 0x94B53; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Lo = 0x94B52; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Hi = 0x94B49; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Lo = 0x94B48; - constexpr unsigned int mmUMCCH6_PerfMonCtl1 = 0xD4341; - constexpr unsigned int mmUMCCH6_PerfMonCtl2 = 0xD4342; - constexpr unsigned int mmUMCCH6_PerfMonCtl3 = 0xD4343; - constexpr unsigned int mmUMCCH6_PerfMonCtl4 = 0xD4344; - constexpr unsigned int mmUMCCH6_PerfMonCtl5 = 0xD4345; - constexpr unsigned int mmUMCCH6_PerfMonCtlClk = 0xD4340; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Hi = 0xD434B; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Lo = 0xD434A; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Hi = 0xD434D; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Lo = 0xD434C; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Hi = 0xD434F; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Lo = 0xD434E; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Hi = 0xD4351; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Lo = 0xD4350; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Hi = 0xD4353; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Lo = 0xD4352; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Hi = 0xD4349; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Lo = 0xD4348; - constexpr unsigned int mmUMCCH7_PerfMonCtl1 = 0xD4B41; - constexpr unsigned int mmUMCCH7_PerfMonCtl2 = 0xD4B42; - constexpr unsigned int mmUMCCH7_PerfMonCtl3 = 0xD4B43; - constexpr unsigned int mmUMCCH7_PerfMonCtl4 = 0xD4B44; - constexpr unsigned int mmUMCCH7_PerfMonCtl5 = 0xD4B45; - constexpr unsigned int mmUMCCH7_PerfMonCtlClk = 0xD4B40; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Hi = 0xD4B4B; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Lo = 0xD4B4A; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Hi = 0xD4B4D; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Lo = 0xD4B4C; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Hi = 0xD4B4F; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Lo = 0xD4B4E; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Hi = 0xD4B51; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Lo = 0xD4B50; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Hi = 0xD4B53; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Lo = 0xD4B52; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Hi = 0xD4B49; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Lo = 0xD4B48; - constexpr unsigned int mmUMCCH8_PerfMonCtl1 = 0x114341; - constexpr unsigned int mmUMCCH8_PerfMonCtl2 = 0x114342; - constexpr unsigned int mmUMCCH8_PerfMonCtl3 = 0x114343; - constexpr unsigned int mmUMCCH8_PerfMonCtl4 = 0x114344; - constexpr unsigned int mmUMCCH8_PerfMonCtl5 = 0x114345; - constexpr unsigned int mmUMCCH8_PerfMonCtlClk = 0x114340; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Hi = 0x11434B; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Lo = 0x11434A; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Hi = 0x11434D; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Lo = 0x11434C; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Hi = 0x11434F; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Lo = 0x11434E; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Hi = 0x114351; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Lo = 0x114350; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Hi = 0x114353; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Lo = 0x114352; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Hi = 0x114349; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Lo = 0x114348; - constexpr unsigned int mmUMCCH9_PerfMonCtl1 = 0x114B41; - constexpr unsigned int mmUMCCH9_PerfMonCtl2 = 0x114B42; - constexpr unsigned int mmUMCCH9_PerfMonCtl3 = 0x114B43; - constexpr unsigned int mmUMCCH9_PerfMonCtl4 = 0x114B44; - constexpr unsigned int mmUMCCH9_PerfMonCtl5 = 0x114B45; - constexpr unsigned int mmUMCCH9_PerfMonCtlClk = 0x114B40; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Hi = 0x114B4B; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Lo = 0x114B4A; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Hi = 0x114B4D; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Lo = 0x114B4C; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Hi = 0x114B4F; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Lo = 0x114B4E; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Hi = 0x114B51; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Lo = 0x114B50; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Hi = 0x114B53; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Lo = 0x114B52; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Hi = 0x114B49; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Lo = 0x114B48; - constexpr unsigned int mmUMCCH10_PerfMonCtl1 = 0x154341; - constexpr unsigned int mmUMCCH10_PerfMonCtl2 = 0x154342; - constexpr unsigned int mmUMCCH10_PerfMonCtl3 = 0x154343; - constexpr unsigned int mmUMCCH10_PerfMonCtl4 = 0x154344; - constexpr unsigned int mmUMCCH10_PerfMonCtl5 = 0x154345; - constexpr unsigned int mmUMCCH10_PerfMonCtlClk = 0x154340; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Hi = 0x15434B; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Lo = 0x15434A; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Hi = 0x15434D; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Lo = 0x15434C; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Hi = 0x15434F; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Lo = 0x15434E; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Hi = 0x154351; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Lo = 0x154350; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Hi = 0x154353; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Lo = 0x154352; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Hi = 0x154349; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Lo = 0x154348; - constexpr unsigned int mmUMCCH11_PerfMonCtl1 = 0x154B41; - constexpr unsigned int mmUMCCH11_PerfMonCtl2 = 0x154B42; - constexpr unsigned int mmUMCCH11_PerfMonCtl3 = 0x154B43; - constexpr unsigned int mmUMCCH11_PerfMonCtl4 = 0x154B44; - constexpr unsigned int mmUMCCH11_PerfMonCtl5 = 0x154B45; - constexpr unsigned int mmUMCCH11_PerfMonCtlClk = 0x154B40; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Hi = 0x154B4B; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Lo = 0x154B4A; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Hi = 0x154B4D; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Lo = 0x154B4C; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Hi = 0x154B4F; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Lo = 0x154B4E; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Hi = 0x154B51; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Lo = 0x154B50; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Hi = 0x154B53; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Lo = 0x154B52; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Hi = 0x154B49; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Lo = 0x154B48; - constexpr unsigned int mmUMCCH12_PerfMonCtl1 = 0x194341; - constexpr unsigned int mmUMCCH12_PerfMonCtl2 = 0x194342; - constexpr unsigned int mmUMCCH12_PerfMonCtl3 = 0x194343; - constexpr unsigned int mmUMCCH12_PerfMonCtl4 = 0x194344; - constexpr unsigned int mmUMCCH12_PerfMonCtl5 = 0x194345; - constexpr unsigned int mmUMCCH12_PerfMonCtlClk = 0x194340; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Hi = 0x19434B; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Lo = 0x19434A; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Hi = 0x19434D; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Lo = 0x19434C; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Hi = 0x19434F; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Lo = 0x19434E; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Hi = 0x194351; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Lo = 0x194350; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Hi = 0x194353; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Lo = 0x194352; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Hi = 0x194349; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Lo = 0x194348; - constexpr unsigned int mmUMCCH13_PerfMonCtl1 = 0x194B41; - constexpr unsigned int mmUMCCH13_PerfMonCtl2 = 0x194B42; - constexpr unsigned int mmUMCCH13_PerfMonCtl3 = 0x194B43; - constexpr unsigned int mmUMCCH13_PerfMonCtl4 = 0x194B44; - constexpr unsigned int mmUMCCH13_PerfMonCtl5 = 0x194B45; - constexpr unsigned int mmUMCCH13_PerfMonCtlClk = 0x194B40; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Hi = 0x194B4B; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Lo = 0x194B4A; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Hi = 0x194B4D; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Lo = 0x194B4C; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Hi = 0x194B4F; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Lo = 0x194B4E; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Hi = 0x194B51; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Lo = 0x194B50; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Hi = 0x194B53; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Lo = 0x194B52; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Hi = 0x194B49; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Lo = 0x194B48; - constexpr unsigned int mmUMCCH14_PerfMonCtl1 = 0x1D4341; - constexpr unsigned int mmUMCCH14_PerfMonCtl2 = 0x1D4342; - constexpr unsigned int mmUMCCH14_PerfMonCtl3 = 0x1D4343; - constexpr unsigned int mmUMCCH14_PerfMonCtl4 = 0x1D4344; - constexpr unsigned int mmUMCCH14_PerfMonCtl5 = 0x1D4345; - constexpr unsigned int mmUMCCH14_PerfMonCtlClk = 0x1D4340; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Hi = 0x1D434B; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Lo = 0x1D434A; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Hi = 0x1D434D; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Lo = 0x1D434C; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Hi = 0x1D434F; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Lo = 0x1D434E; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Hi = 0x1D4351; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Lo = 0x1D4350; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Hi = 0x1D4353; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Lo = 0x1D4352; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Hi = 0x1D4349; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Lo = 0x1D4348; - constexpr unsigned int mmUMCCH15_PerfMonCtl1 = 0x1D4B41; - constexpr unsigned int mmUMCCH15_PerfMonCtl2 = 0x1D4B42; - constexpr unsigned int mmUMCCH15_PerfMonCtl3 = 0x1D4B43; - constexpr unsigned int mmUMCCH15_PerfMonCtl4 = 0x1D4B44; - constexpr unsigned int mmUMCCH15_PerfMonCtl5 = 0x1D4B45; - constexpr unsigned int mmUMCCH15_PerfMonCtlClk = 0x1D4B40; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Hi = 0x1D4B4B; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Lo = 0x1D4B4A; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Hi = 0x1D4B4D; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Lo = 0x1D4B4C; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Hi = 0x1D4B4F; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Lo = 0x1D4B4E; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Hi = 0x1D4B51; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Lo = 0x1D4B50; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Hi = 0x1D4B53; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Lo = 0x1D4B52; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Hi = 0x1D4B49; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Lo = 0x1D4B48; - constexpr unsigned int mmVGT_ESGS_RING_SIZE = 0xC240; - constexpr unsigned int mmVGT_GSVS_RING_SIZE = 0xC241; - constexpr unsigned int mmVGT_HS_OFFCHIP_PARAM = 0xC24F; - constexpr unsigned int mmVGT_TF_MEMORY_BASE = 0xC250; - constexpr unsigned int mmVGT_TF_MEMORY_BASE_HI = 0xC261; - constexpr unsigned int mmVGT_TF_RING_SIZE = 0xC24E; -} // namespace Rembrandt - -namespace Rn -{ - constexpr unsigned int mmRPB_PERFCOUNTER0_CFG = 0x0D04; - constexpr unsigned int mmRPB_PERFCOUNTER1_CFG = 0x0D05; - constexpr unsigned int mmRPB_PERFCOUNTER2_CFG = 0x0D06; - constexpr unsigned int mmRPB_PERFCOUNTER3_CFG = 0x0D07; - constexpr unsigned int mmRPB_PERFCOUNTER_HI = 0x0D03; - constexpr unsigned int mmRPB_PERFCOUNTER_LO = 0x0D02; - constexpr unsigned int mmRPB_PERFCOUNTER_RSLT_CNTL = 0x0D08; - constexpr unsigned int mmRPB_PERF_COUNTER_CNTL = 0x0CFB; -} // namespace Rn - -namespace Rv2x_Rn -{ - constexpr unsigned int mmRLC_SPM_ACCUM_CTRL = 0xDCAA; - constexpr unsigned int mmRLC_SPM_ACCUM_CTRLRAM_ADDR = 0xDCA7; - constexpr unsigned int mmRLC_SPM_ACCUM_CTRLRAM_DATA = 0xDCA8; - constexpr unsigned int mmRLC_SPM_ACCUM_DATARAM_ADDR = 0xDCA5; - constexpr unsigned int mmRLC_SPM_ACCUM_DATARAM_DATA = 0xDCA6; - constexpr unsigned int mmRLC_SPM_ACCUM_DATARAM_WRCOUNT = 0xDCAE; - constexpr unsigned int mmRLC_SPM_ACCUM_MODE = 0xDCAB; - constexpr unsigned int mmRLC_SPM_ACCUM_SAMPLES_REQUESTED = 0xDCAD; - constexpr unsigned int mmRLC_SPM_ACCUM_STATUS = 0xDCA9; - constexpr unsigned int mmRLC_SPM_ACCUM_THRESHOLD = 0xDCAC; - constexpr unsigned int mmSPI_SHADER_PGM_CHKSUM_VS = 0x2C45; -} // namespace Rv2x_Rn - -namespace Vega -{ - constexpr unsigned int mmSDMA1_PERFCOUNTER0_RESULT = 0x14B8; - constexpr unsigned int mmSDMA1_PERFCOUNTER1_RESULT = 0x14B9; - constexpr unsigned int mmSDMA1_PERFCOUNTER_TAG_DELAY_RANGE = 0x14BA; - constexpr unsigned int mmSDMA1_PERFMON_CNTL = 0x14B7; - constexpr unsigned int mmUMCCH1_PerfMonCtl1 = 0x14B41; - constexpr unsigned int mmUMCCH1_PerfMonCtl2 = 0x14B42; - constexpr unsigned int mmUMCCH1_PerfMonCtl3 = 0x14B43; - constexpr unsigned int mmUMCCH1_PerfMonCtl4 = 0x14B44; - constexpr unsigned int mmUMCCH1_PerfMonCtl5 = 0x14B45; - constexpr unsigned int mmUMCCH1_PerfMonCtlClk = 0x14B40; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Hi = 0x14B4B; - constexpr unsigned int mmUMCCH1_PerfMonCtr1_Lo = 0x14B4A; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Hi = 0x14B4D; - constexpr unsigned int mmUMCCH1_PerfMonCtr2_Lo = 0x14B4C; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Hi = 0x14B4F; - constexpr unsigned int mmUMCCH1_PerfMonCtr3_Lo = 0x14B4E; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Hi = 0x14B51; - constexpr unsigned int mmUMCCH1_PerfMonCtr4_Lo = 0x14B50; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Hi = 0x14B53; - constexpr unsigned int mmUMCCH1_PerfMonCtr5_Lo = 0x14B52; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Hi = 0x14B49; - constexpr unsigned int mmUMCCH1_PerfMonCtrClk_Lo = 0x14B48; - constexpr unsigned int mmUMCCH2_PerfMonCtl1 = 0x15341; - constexpr unsigned int mmUMCCH2_PerfMonCtl2 = 0x15342; - constexpr unsigned int mmUMCCH2_PerfMonCtl3 = 0x15343; - constexpr unsigned int mmUMCCH2_PerfMonCtl4 = 0x15344; - constexpr unsigned int mmUMCCH2_PerfMonCtl5 = 0x15345; - constexpr unsigned int mmUMCCH2_PerfMonCtlClk = 0x15340; - constexpr unsigned int mmUMCCH2_PerfMonCtr1_Hi = 0x1534B; - constexpr unsigned int mmUMCCH2_PerfMonCtr1_Lo = 0x1534A; - constexpr unsigned int mmUMCCH2_PerfMonCtr2_Hi = 0x1534D; - constexpr unsigned int mmUMCCH2_PerfMonCtr2_Lo = 0x1534C; - constexpr unsigned int mmUMCCH2_PerfMonCtr3_Hi = 0x1534F; - constexpr unsigned int mmUMCCH2_PerfMonCtr3_Lo = 0x1534E; - constexpr unsigned int mmUMCCH2_PerfMonCtr4_Hi = 0x15351; - constexpr unsigned int mmUMCCH2_PerfMonCtr4_Lo = 0x15350; - constexpr unsigned int mmUMCCH2_PerfMonCtr5_Hi = 0x15353; - constexpr unsigned int mmUMCCH2_PerfMonCtr5_Lo = 0x15352; - constexpr unsigned int mmUMCCH2_PerfMonCtrClk_Hi = 0x15349; - constexpr unsigned int mmUMCCH2_PerfMonCtrClk_Lo = 0x15348; - constexpr unsigned int mmUMCCH3_PerfMonCtl1 = 0x15B41; - constexpr unsigned int mmUMCCH3_PerfMonCtl2 = 0x15B42; - constexpr unsigned int mmUMCCH3_PerfMonCtl3 = 0x15B43; - constexpr unsigned int mmUMCCH3_PerfMonCtl4 = 0x15B44; - constexpr unsigned int mmUMCCH3_PerfMonCtl5 = 0x15B45; - constexpr unsigned int mmUMCCH3_PerfMonCtlClk = 0x15B40; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Hi = 0x15B4B; - constexpr unsigned int mmUMCCH3_PerfMonCtr1_Lo = 0x15B4A; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Hi = 0x15B4D; - constexpr unsigned int mmUMCCH3_PerfMonCtr2_Lo = 0x15B4C; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Hi = 0x15B4F; - constexpr unsigned int mmUMCCH3_PerfMonCtr3_Lo = 0x15B4E; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Hi = 0x15B51; - constexpr unsigned int mmUMCCH3_PerfMonCtr4_Lo = 0x15B50; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Hi = 0x15B53; - constexpr unsigned int mmUMCCH3_PerfMonCtr5_Lo = 0x15B52; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Hi = 0x15B49; - constexpr unsigned int mmUMCCH3_PerfMonCtrClk_Lo = 0x15B48; - constexpr unsigned int mmUMCCH4_PerfMonCtl1 = 0x54341; - constexpr unsigned int mmUMCCH4_PerfMonCtl2 = 0x54342; - constexpr unsigned int mmUMCCH4_PerfMonCtl3 = 0x54343; - constexpr unsigned int mmUMCCH4_PerfMonCtl4 = 0x54344; - constexpr unsigned int mmUMCCH4_PerfMonCtl5 = 0x54345; - constexpr unsigned int mmUMCCH4_PerfMonCtlClk = 0x54340; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Hi = 0x5434B; - constexpr unsigned int mmUMCCH4_PerfMonCtr1_Lo = 0x5434A; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Hi = 0x5434D; - constexpr unsigned int mmUMCCH4_PerfMonCtr2_Lo = 0x5434C; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Hi = 0x5434F; - constexpr unsigned int mmUMCCH4_PerfMonCtr3_Lo = 0x5434E; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Hi = 0x54351; - constexpr unsigned int mmUMCCH4_PerfMonCtr4_Lo = 0x54350; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Hi = 0x54353; - constexpr unsigned int mmUMCCH4_PerfMonCtr5_Lo = 0x54352; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Hi = 0x54349; - constexpr unsigned int mmUMCCH4_PerfMonCtrClk_Lo = 0x54348; - constexpr unsigned int mmUMCCH5_PerfMonCtl1 = 0x54B41; - constexpr unsigned int mmUMCCH5_PerfMonCtl2 = 0x54B42; - constexpr unsigned int mmUMCCH5_PerfMonCtl3 = 0x54B43; - constexpr unsigned int mmUMCCH5_PerfMonCtl4 = 0x54B44; - constexpr unsigned int mmUMCCH5_PerfMonCtl5 = 0x54B45; - constexpr unsigned int mmUMCCH5_PerfMonCtlClk = 0x54B40; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Hi = 0x54B4B; - constexpr unsigned int mmUMCCH5_PerfMonCtr1_Lo = 0x54B4A; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Hi = 0x54B4D; - constexpr unsigned int mmUMCCH5_PerfMonCtr2_Lo = 0x54B4C; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Hi = 0x54B4F; - constexpr unsigned int mmUMCCH5_PerfMonCtr3_Lo = 0x54B4E; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Hi = 0x54B51; - constexpr unsigned int mmUMCCH5_PerfMonCtr4_Lo = 0x54B50; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Hi = 0x54B53; - constexpr unsigned int mmUMCCH5_PerfMonCtr5_Lo = 0x54B52; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Hi = 0x54B49; - constexpr unsigned int mmUMCCH5_PerfMonCtrClk_Lo = 0x54B48; - constexpr unsigned int mmUMCCH6_PerfMonCtl1 = 0x55341; - constexpr unsigned int mmUMCCH6_PerfMonCtl2 = 0x55342; - constexpr unsigned int mmUMCCH6_PerfMonCtl3 = 0x55343; - constexpr unsigned int mmUMCCH6_PerfMonCtl4 = 0x55344; - constexpr unsigned int mmUMCCH6_PerfMonCtl5 = 0x55345; - constexpr unsigned int mmUMCCH6_PerfMonCtlClk = 0x55340; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Hi = 0x5534B; - constexpr unsigned int mmUMCCH6_PerfMonCtr1_Lo = 0x5534A; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Hi = 0x5534D; - constexpr unsigned int mmUMCCH6_PerfMonCtr2_Lo = 0x5534C; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Hi = 0x5534F; - constexpr unsigned int mmUMCCH6_PerfMonCtr3_Lo = 0x5534E; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Hi = 0x55351; - constexpr unsigned int mmUMCCH6_PerfMonCtr4_Lo = 0x55350; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Hi = 0x55353; - constexpr unsigned int mmUMCCH6_PerfMonCtr5_Lo = 0x55352; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Hi = 0x55349; - constexpr unsigned int mmUMCCH6_PerfMonCtrClk_Lo = 0x55348; - constexpr unsigned int mmUMCCH7_PerfMonCtl1 = 0x55B41; - constexpr unsigned int mmUMCCH7_PerfMonCtl2 = 0x55B42; - constexpr unsigned int mmUMCCH7_PerfMonCtl3 = 0x55B43; - constexpr unsigned int mmUMCCH7_PerfMonCtl4 = 0x55B44; - constexpr unsigned int mmUMCCH7_PerfMonCtl5 = 0x55B45; - constexpr unsigned int mmUMCCH7_PerfMonCtlClk = 0x55B40; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Hi = 0x55B4B; - constexpr unsigned int mmUMCCH7_PerfMonCtr1_Lo = 0x55B4A; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Hi = 0x55B4D; - constexpr unsigned int mmUMCCH7_PerfMonCtr2_Lo = 0x55B4C; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Hi = 0x55B4F; - constexpr unsigned int mmUMCCH7_PerfMonCtr3_Lo = 0x55B4E; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Hi = 0x55B51; - constexpr unsigned int mmUMCCH7_PerfMonCtr4_Lo = 0x55B50; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Hi = 0x55B53; - constexpr unsigned int mmUMCCH7_PerfMonCtr5_Lo = 0x55B52; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Hi = 0x55B49; - constexpr unsigned int mmUMCCH7_PerfMonCtrClk_Lo = 0x55B48; -} // namespace Vega - -namespace Vg10_Vg12_Rn -{ - constexpr unsigned int mmMP1_SMN_FPS_CNT = 0x162C4; -} // namespace Vg10_Vg12_Rn - -namespace Vg10_Vg12_Vg20_Rv1x_Rv2x -{ - constexpr unsigned int mmRPB_PERFCOUNTER0_CFG = 0x0D03; - constexpr unsigned int mmRPB_PERFCOUNTER1_CFG = 0x0D04; - constexpr unsigned int mmRPB_PERFCOUNTER2_CFG = 0x0D05; - constexpr unsigned int mmRPB_PERFCOUNTER3_CFG = 0x0D06; - constexpr unsigned int mmRPB_PERFCOUNTER_HI = 0x0D02; - constexpr unsigned int mmRPB_PERFCOUNTER_LO = 0x0D01; - constexpr unsigned int mmRPB_PERFCOUNTER_RSLT_CNTL = 0x0D07; - constexpr unsigned int mmRPB_PERF_COUNTER_CNTL = 0x0CFA; -} // namespace Vg10_Vg12_Vg20_Rv1x_Rv2x - -namespace Vg10_Vg20 -{ - constexpr unsigned int mmRLC_SPM_CBR0_PERFMON_SAMPLE_DELAY = 0xDCA1; - constexpr unsigned int mmRLC_SPM_CBR1_PERFMON_SAMPLE_DELAY = 0xDCA2; - constexpr unsigned int mmRLC_SPM_DBR0_PERFMON_SAMPLE_DELAY = 0xDC9F; - constexpr unsigned int mmRLC_SPM_DBR1_PERFMON_SAMPLE_DELAY = 0xDCA0; - constexpr unsigned int mmUMCCH8_PerfMonCtl1 = 0x94341; - constexpr unsigned int mmUMCCH8_PerfMonCtl2 = 0x94342; - constexpr unsigned int mmUMCCH8_PerfMonCtl3 = 0x94343; - constexpr unsigned int mmUMCCH8_PerfMonCtl4 = 0x94344; - constexpr unsigned int mmUMCCH8_PerfMonCtl5 = 0x94345; - constexpr unsigned int mmUMCCH8_PerfMonCtlClk = 0x94340; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Hi = 0x9434B; - constexpr unsigned int mmUMCCH8_PerfMonCtr1_Lo = 0x9434A; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Hi = 0x9434D; - constexpr unsigned int mmUMCCH8_PerfMonCtr2_Lo = 0x9434C; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Hi = 0x9434F; - constexpr unsigned int mmUMCCH8_PerfMonCtr3_Lo = 0x9434E; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Hi = 0x94351; - constexpr unsigned int mmUMCCH8_PerfMonCtr4_Lo = 0x94350; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Hi = 0x94353; - constexpr unsigned int mmUMCCH8_PerfMonCtr5_Lo = 0x94352; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Hi = 0x94349; - constexpr unsigned int mmUMCCH8_PerfMonCtrClk_Lo = 0x94348; - constexpr unsigned int mmUMCCH9_PerfMonCtl1 = 0x94B41; - constexpr unsigned int mmUMCCH9_PerfMonCtl2 = 0x94B42; - constexpr unsigned int mmUMCCH9_PerfMonCtl3 = 0x94B43; - constexpr unsigned int mmUMCCH9_PerfMonCtl4 = 0x94B44; - constexpr unsigned int mmUMCCH9_PerfMonCtl5 = 0x94B45; - constexpr unsigned int mmUMCCH9_PerfMonCtlClk = 0x94B40; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Hi = 0x94B4B; - constexpr unsigned int mmUMCCH9_PerfMonCtr1_Lo = 0x94B4A; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Hi = 0x94B4D; - constexpr unsigned int mmUMCCH9_PerfMonCtr2_Lo = 0x94B4C; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Hi = 0x94B4F; - constexpr unsigned int mmUMCCH9_PerfMonCtr3_Lo = 0x94B4E; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Hi = 0x94B51; - constexpr unsigned int mmUMCCH9_PerfMonCtr4_Lo = 0x94B50; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Hi = 0x94B53; - constexpr unsigned int mmUMCCH9_PerfMonCtr5_Lo = 0x94B52; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Hi = 0x94B49; - constexpr unsigned int mmUMCCH9_PerfMonCtrClk_Lo = 0x94B48; - constexpr unsigned int mmUMCCH10_PerfMonCtl1 = 0x95341; - constexpr unsigned int mmUMCCH10_PerfMonCtl2 = 0x95342; - constexpr unsigned int mmUMCCH10_PerfMonCtl3 = 0x95343; - constexpr unsigned int mmUMCCH10_PerfMonCtl4 = 0x95344; - constexpr unsigned int mmUMCCH10_PerfMonCtl5 = 0x95345; - constexpr unsigned int mmUMCCH10_PerfMonCtlClk = 0x95340; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Hi = 0x9534B; - constexpr unsigned int mmUMCCH10_PerfMonCtr1_Lo = 0x9534A; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Hi = 0x9534D; - constexpr unsigned int mmUMCCH10_PerfMonCtr2_Lo = 0x9534C; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Hi = 0x9534F; - constexpr unsigned int mmUMCCH10_PerfMonCtr3_Lo = 0x9534E; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Hi = 0x95351; - constexpr unsigned int mmUMCCH10_PerfMonCtr4_Lo = 0x95350; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Hi = 0x95353; - constexpr unsigned int mmUMCCH10_PerfMonCtr5_Lo = 0x95352; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Hi = 0x95349; - constexpr unsigned int mmUMCCH10_PerfMonCtrClk_Lo = 0x95348; - constexpr unsigned int mmUMCCH11_PerfMonCtl1 = 0x95B41; - constexpr unsigned int mmUMCCH11_PerfMonCtl2 = 0x95B42; - constexpr unsigned int mmUMCCH11_PerfMonCtl3 = 0x95B43; - constexpr unsigned int mmUMCCH11_PerfMonCtl4 = 0x95B44; - constexpr unsigned int mmUMCCH11_PerfMonCtl5 = 0x95B45; - constexpr unsigned int mmUMCCH11_PerfMonCtlClk = 0x95B40; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Hi = 0x95B4B; - constexpr unsigned int mmUMCCH11_PerfMonCtr1_Lo = 0x95B4A; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Hi = 0x95B4D; - constexpr unsigned int mmUMCCH11_PerfMonCtr2_Lo = 0x95B4C; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Hi = 0x95B4F; - constexpr unsigned int mmUMCCH11_PerfMonCtr3_Lo = 0x95B4E; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Hi = 0x95B51; - constexpr unsigned int mmUMCCH11_PerfMonCtr4_Lo = 0x95B50; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Hi = 0x95B53; - constexpr unsigned int mmUMCCH11_PerfMonCtr5_Lo = 0x95B52; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Hi = 0x95B49; - constexpr unsigned int mmUMCCH11_PerfMonCtrClk_Lo = 0x95B48; - constexpr unsigned int mmUMCCH12_PerfMonCtl1 = 0xD4341; - constexpr unsigned int mmUMCCH12_PerfMonCtl2 = 0xD4342; - constexpr unsigned int mmUMCCH12_PerfMonCtl3 = 0xD4343; - constexpr unsigned int mmUMCCH12_PerfMonCtl4 = 0xD4344; - constexpr unsigned int mmUMCCH12_PerfMonCtl5 = 0xD4345; - constexpr unsigned int mmUMCCH12_PerfMonCtlClk = 0xD4340; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Hi = 0xD434B; - constexpr unsigned int mmUMCCH12_PerfMonCtr1_Lo = 0xD434A; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Hi = 0xD434D; - constexpr unsigned int mmUMCCH12_PerfMonCtr2_Lo = 0xD434C; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Hi = 0xD434F; - constexpr unsigned int mmUMCCH12_PerfMonCtr3_Lo = 0xD434E; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Hi = 0xD4351; - constexpr unsigned int mmUMCCH12_PerfMonCtr4_Lo = 0xD4350; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Hi = 0xD4353; - constexpr unsigned int mmUMCCH12_PerfMonCtr5_Lo = 0xD4352; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Hi = 0xD4349; - constexpr unsigned int mmUMCCH12_PerfMonCtrClk_Lo = 0xD4348; - constexpr unsigned int mmUMCCH13_PerfMonCtl1 = 0xD4B41; - constexpr unsigned int mmUMCCH13_PerfMonCtl2 = 0xD4B42; - constexpr unsigned int mmUMCCH13_PerfMonCtl3 = 0xD4B43; - constexpr unsigned int mmUMCCH13_PerfMonCtl4 = 0xD4B44; - constexpr unsigned int mmUMCCH13_PerfMonCtl5 = 0xD4B45; - constexpr unsigned int mmUMCCH13_PerfMonCtlClk = 0xD4B40; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Hi = 0xD4B4B; - constexpr unsigned int mmUMCCH13_PerfMonCtr1_Lo = 0xD4B4A; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Hi = 0xD4B4D; - constexpr unsigned int mmUMCCH13_PerfMonCtr2_Lo = 0xD4B4C; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Hi = 0xD4B4F; - constexpr unsigned int mmUMCCH13_PerfMonCtr3_Lo = 0xD4B4E; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Hi = 0xD4B51; - constexpr unsigned int mmUMCCH13_PerfMonCtr4_Lo = 0xD4B50; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Hi = 0xD4B53; - constexpr unsigned int mmUMCCH13_PerfMonCtr5_Lo = 0xD4B52; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Hi = 0xD4B49; - constexpr unsigned int mmUMCCH13_PerfMonCtrClk_Lo = 0xD4B48; - constexpr unsigned int mmUMCCH14_PerfMonCtl1 = 0xD5341; - constexpr unsigned int mmUMCCH14_PerfMonCtl2 = 0xD5342; - constexpr unsigned int mmUMCCH14_PerfMonCtl3 = 0xD5343; - constexpr unsigned int mmUMCCH14_PerfMonCtl4 = 0xD5344; - constexpr unsigned int mmUMCCH14_PerfMonCtl5 = 0xD5345; - constexpr unsigned int mmUMCCH14_PerfMonCtlClk = 0xD5340; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Hi = 0xD534B; - constexpr unsigned int mmUMCCH14_PerfMonCtr1_Lo = 0xD534A; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Hi = 0xD534D; - constexpr unsigned int mmUMCCH14_PerfMonCtr2_Lo = 0xD534C; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Hi = 0xD534F; - constexpr unsigned int mmUMCCH14_PerfMonCtr3_Lo = 0xD534E; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Hi = 0xD5351; - constexpr unsigned int mmUMCCH14_PerfMonCtr4_Lo = 0xD5350; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Hi = 0xD5353; - constexpr unsigned int mmUMCCH14_PerfMonCtr5_Lo = 0xD5352; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Hi = 0xD5349; - constexpr unsigned int mmUMCCH14_PerfMonCtrClk_Lo = 0xD5348; - constexpr unsigned int mmUMCCH15_PerfMonCtl1 = 0xD5B41; - constexpr unsigned int mmUMCCH15_PerfMonCtl2 = 0xD5B42; - constexpr unsigned int mmUMCCH15_PerfMonCtl3 = 0xD5B43; - constexpr unsigned int mmUMCCH15_PerfMonCtl4 = 0xD5B44; - constexpr unsigned int mmUMCCH15_PerfMonCtl5 = 0xD5B45; - constexpr unsigned int mmUMCCH15_PerfMonCtlClk = 0xD5B40; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Hi = 0xD5B4B; - constexpr unsigned int mmUMCCH15_PerfMonCtr1_Lo = 0xD5B4A; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Hi = 0xD5B4D; - constexpr unsigned int mmUMCCH15_PerfMonCtr2_Lo = 0xD5B4C; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Hi = 0xD5B4F; - constexpr unsigned int mmUMCCH15_PerfMonCtr3_Lo = 0xD5B4E; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Hi = 0xD5B51; - constexpr unsigned int mmUMCCH15_PerfMonCtr4_Lo = 0xD5B50; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Hi = 0xD5B53; - constexpr unsigned int mmUMCCH15_PerfMonCtr5_Lo = 0xD5B52; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Hi = 0xD5B49; - constexpr unsigned int mmUMCCH15_PerfMonCtrClk_Lo = 0xD5B48; -} // namespace Vg10_Vg20 - -namespace Vg12 -{ - constexpr unsigned int mmXDMA_SLV_FLIP_PENDING = 0x348C; -} // namespace Vg12 - -namespace Vg12_Rn -{ - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi0 = 0x7511; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi1 = 0x7515; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi2 = 0x7519; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi3 = 0x751D; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi4 = 0x7621; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi5 = 0x7623; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi6 = 0x7625; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlHi7 = 0x7627; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo0 = 0x7510; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo1 = 0x7514; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo2 = 0x7518; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo3 = 0x751C; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo4 = 0x7620; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo5 = 0x7622; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo6 = 0x7624; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtlLo7 = 0x7626; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi0 = 0x7513; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi1 = 0x7517; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi2 = 0x751B; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi3 = 0x751F; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi4 = 0x75E5; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi5 = 0x75E7; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi6 = 0x75E9; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrHi7 = 0x75EB; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo0 = 0x7512; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo1 = 0x7516; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo2 = 0x751A; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo3 = 0x751E; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo4 = 0x75E4; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo5 = 0x75E6; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo6 = 0x75E8; - constexpr unsigned int mmDF_PIE_AON_PerfMonCtrLo7 = 0x75EA; - constexpr unsigned int mmPerfMonCtl1 = 0x14341; - constexpr unsigned int mmPerfMonCtl2 = 0x14342; - constexpr unsigned int mmPerfMonCtl3 = 0x14343; - constexpr unsigned int mmPerfMonCtl4 = 0x14344; - constexpr unsigned int mmPerfMonCtl5 = 0x14345; - constexpr unsigned int mmPerfMonCtlClk = 0x14340; - constexpr unsigned int mmPerfMonCtr1_Hi = 0x1434B; - constexpr unsigned int mmPerfMonCtr1_Lo = 0x1434A; - constexpr unsigned int mmPerfMonCtr2_Hi = 0x1434D; - constexpr unsigned int mmPerfMonCtr2_Lo = 0x1434C; - constexpr unsigned int mmPerfMonCtr3_Hi = 0x1434F; - constexpr unsigned int mmPerfMonCtr3_Lo = 0x1434E; - constexpr unsigned int mmPerfMonCtr4_Hi = 0x14351; - constexpr unsigned int mmPerfMonCtr4_Lo = 0x14350; - constexpr unsigned int mmPerfMonCtr5_Hi = 0x14353; - constexpr unsigned int mmPerfMonCtr5_Lo = 0x14352; - constexpr unsigned int mmPerfMonCtrClk_Hi = 0x14349; - constexpr unsigned int mmPerfMonCtrClk_Lo = 0x14348; -} // namespace Vg12_Rn - -namespace Vg12_Vg20 -{ - constexpr unsigned int mmPA_STATE_STEREO_X = 0xC2B5; - constexpr unsigned int mmPA_STEREO_CNTL = 0xA210; -} // namespace Vg12_Vg20 - -namespace Vg12_Vg20_Rn -{ - constexpr unsigned int mmPA_SC_ENHANCE_2 = 0x22DC; -} // namespace Vg12_Vg20_Rn - -namespace Vg20 -{ - constexpr unsigned int mmTA_DSM_CNTL = 0x2584; - constexpr unsigned int mmTA_DSM_CNTL2 = 0x2585; - constexpr unsigned int mmTA_EDC_CNT = 0x2586; - constexpr unsigned int mmUMCCH16_PerfMonCtl1 = 0x114341; - constexpr unsigned int mmUMCCH16_PerfMonCtl2 = 0x114342; - constexpr unsigned int mmUMCCH16_PerfMonCtl3 = 0x114343; - constexpr unsigned int mmUMCCH16_PerfMonCtl4 = 0x114344; - constexpr unsigned int mmUMCCH16_PerfMonCtl5 = 0x114345; - constexpr unsigned int mmUMCCH16_PerfMonCtlClk = 0x114340; - constexpr unsigned int mmUMCCH16_PerfMonCtr1_Hi = 0x11434B; - constexpr unsigned int mmUMCCH16_PerfMonCtr1_Lo = 0x11434A; - constexpr unsigned int mmUMCCH16_PerfMonCtr2_Hi = 0x11434D; - constexpr unsigned int mmUMCCH16_PerfMonCtr2_Lo = 0x11434C; - constexpr unsigned int mmUMCCH16_PerfMonCtr3_Hi = 0x11434F; - constexpr unsigned int mmUMCCH16_PerfMonCtr3_Lo = 0x11434E; - constexpr unsigned int mmUMCCH16_PerfMonCtr4_Hi = 0x114351; - constexpr unsigned int mmUMCCH16_PerfMonCtr4_Lo = 0x114350; - constexpr unsigned int mmUMCCH16_PerfMonCtr5_Hi = 0x114353; - constexpr unsigned int mmUMCCH16_PerfMonCtr5_Lo = 0x114352; - constexpr unsigned int mmUMCCH16_PerfMonCtrClk_Hi = 0x114349; - constexpr unsigned int mmUMCCH16_PerfMonCtrClk_Lo = 0x114348; - constexpr unsigned int mmUMCCH17_PerfMonCtl1 = 0x114B41; - constexpr unsigned int mmUMCCH17_PerfMonCtl2 = 0x114B42; - constexpr unsigned int mmUMCCH17_PerfMonCtl3 = 0x114B43; - constexpr unsigned int mmUMCCH17_PerfMonCtl4 = 0x114B44; - constexpr unsigned int mmUMCCH17_PerfMonCtl5 = 0x114B45; - constexpr unsigned int mmUMCCH17_PerfMonCtlClk = 0x114B40; - constexpr unsigned int mmUMCCH17_PerfMonCtr1_Hi = 0x114B4B; - constexpr unsigned int mmUMCCH17_PerfMonCtr1_Lo = 0x114B4A; - constexpr unsigned int mmUMCCH17_PerfMonCtr2_Hi = 0x114B4D; - constexpr unsigned int mmUMCCH17_PerfMonCtr2_Lo = 0x114B4C; - constexpr unsigned int mmUMCCH17_PerfMonCtr3_Hi = 0x114B4F; - constexpr unsigned int mmUMCCH17_PerfMonCtr3_Lo = 0x114B4E; - constexpr unsigned int mmUMCCH17_PerfMonCtr4_Hi = 0x114B51; - constexpr unsigned int mmUMCCH17_PerfMonCtr4_Lo = 0x114B50; - constexpr unsigned int mmUMCCH17_PerfMonCtr5_Hi = 0x114B53; - constexpr unsigned int mmUMCCH17_PerfMonCtr5_Lo = 0x114B52; - constexpr unsigned int mmUMCCH17_PerfMonCtrClk_Hi = 0x114B49; - constexpr unsigned int mmUMCCH17_PerfMonCtrClk_Lo = 0x114B48; - constexpr unsigned int mmUMCCH18_PerfMonCtl1 = 0x115341; - constexpr unsigned int mmUMCCH18_PerfMonCtl2 = 0x115342; - constexpr unsigned int mmUMCCH18_PerfMonCtl3 = 0x115343; - constexpr unsigned int mmUMCCH18_PerfMonCtl4 = 0x115344; - constexpr unsigned int mmUMCCH18_PerfMonCtl5 = 0x115345; - constexpr unsigned int mmUMCCH18_PerfMonCtlClk = 0x115340; - constexpr unsigned int mmUMCCH18_PerfMonCtr1_Hi = 0x11534B; - constexpr unsigned int mmUMCCH18_PerfMonCtr1_Lo = 0x11534A; - constexpr unsigned int mmUMCCH18_PerfMonCtr2_Hi = 0x11534D; - constexpr unsigned int mmUMCCH18_PerfMonCtr2_Lo = 0x11534C; - constexpr unsigned int mmUMCCH18_PerfMonCtr3_Hi = 0x11534F; - constexpr unsigned int mmUMCCH18_PerfMonCtr3_Lo = 0x11534E; - constexpr unsigned int mmUMCCH18_PerfMonCtr4_Hi = 0x115351; - constexpr unsigned int mmUMCCH18_PerfMonCtr4_Lo = 0x115350; - constexpr unsigned int mmUMCCH18_PerfMonCtr5_Hi = 0x115353; - constexpr unsigned int mmUMCCH18_PerfMonCtr5_Lo = 0x115352; - constexpr unsigned int mmUMCCH18_PerfMonCtrClk_Hi = 0x115349; - constexpr unsigned int mmUMCCH18_PerfMonCtrClk_Lo = 0x115348; - constexpr unsigned int mmUMCCH19_PerfMonCtl1 = 0x115B41; - constexpr unsigned int mmUMCCH19_PerfMonCtl2 = 0x115B42; - constexpr unsigned int mmUMCCH19_PerfMonCtl3 = 0x115B43; - constexpr unsigned int mmUMCCH19_PerfMonCtl4 = 0x115B44; - constexpr unsigned int mmUMCCH19_PerfMonCtl5 = 0x115B45; - constexpr unsigned int mmUMCCH19_PerfMonCtlClk = 0x115B40; - constexpr unsigned int mmUMCCH19_PerfMonCtr1_Hi = 0x115B4B; - constexpr unsigned int mmUMCCH19_PerfMonCtr1_Lo = 0x115B4A; - constexpr unsigned int mmUMCCH19_PerfMonCtr2_Hi = 0x115B4D; - constexpr unsigned int mmUMCCH19_PerfMonCtr2_Lo = 0x115B4C; - constexpr unsigned int mmUMCCH19_PerfMonCtr3_Hi = 0x115B4F; - constexpr unsigned int mmUMCCH19_PerfMonCtr3_Lo = 0x115B4E; - constexpr unsigned int mmUMCCH19_PerfMonCtr4_Hi = 0x115B51; - constexpr unsigned int mmUMCCH19_PerfMonCtr4_Lo = 0x115B50; - constexpr unsigned int mmUMCCH19_PerfMonCtr5_Hi = 0x115B53; - constexpr unsigned int mmUMCCH19_PerfMonCtr5_Lo = 0x115B52; - constexpr unsigned int mmUMCCH19_PerfMonCtrClk_Hi = 0x115B49; - constexpr unsigned int mmUMCCH19_PerfMonCtrClk_Lo = 0x115B48; - constexpr unsigned int mmUMCCH20_PerfMonCtl1 = 0x154341; - constexpr unsigned int mmUMCCH20_PerfMonCtl2 = 0x154342; - constexpr unsigned int mmUMCCH20_PerfMonCtl3 = 0x154343; - constexpr unsigned int mmUMCCH20_PerfMonCtl4 = 0x154344; - constexpr unsigned int mmUMCCH20_PerfMonCtl5 = 0x154345; - constexpr unsigned int mmUMCCH20_PerfMonCtlClk = 0x154340; - constexpr unsigned int mmUMCCH20_PerfMonCtr1_Hi = 0x15434B; - constexpr unsigned int mmUMCCH20_PerfMonCtr1_Lo = 0x15434A; - constexpr unsigned int mmUMCCH20_PerfMonCtr2_Hi = 0x15434D; - constexpr unsigned int mmUMCCH20_PerfMonCtr2_Lo = 0x15434C; - constexpr unsigned int mmUMCCH20_PerfMonCtr3_Hi = 0x15434F; - constexpr unsigned int mmUMCCH20_PerfMonCtr3_Lo = 0x15434E; - constexpr unsigned int mmUMCCH20_PerfMonCtr4_Hi = 0x154351; - constexpr unsigned int mmUMCCH20_PerfMonCtr4_Lo = 0x154350; - constexpr unsigned int mmUMCCH20_PerfMonCtr5_Hi = 0x154353; - constexpr unsigned int mmUMCCH20_PerfMonCtr5_Lo = 0x154352; - constexpr unsigned int mmUMCCH20_PerfMonCtrClk_Hi = 0x154349; - constexpr unsigned int mmUMCCH20_PerfMonCtrClk_Lo = 0x154348; - constexpr unsigned int mmUMCCH21_PerfMonCtl1 = 0x154B41; - constexpr unsigned int mmUMCCH21_PerfMonCtl2 = 0x154B42; - constexpr unsigned int mmUMCCH21_PerfMonCtl3 = 0x154B43; - constexpr unsigned int mmUMCCH21_PerfMonCtl4 = 0x154B44; - constexpr unsigned int mmUMCCH21_PerfMonCtl5 = 0x154B45; - constexpr unsigned int mmUMCCH21_PerfMonCtlClk = 0x154B40; - constexpr unsigned int mmUMCCH21_PerfMonCtr1_Hi = 0x154B4B; - constexpr unsigned int mmUMCCH21_PerfMonCtr1_Lo = 0x154B4A; - constexpr unsigned int mmUMCCH21_PerfMonCtr2_Hi = 0x154B4D; - constexpr unsigned int mmUMCCH21_PerfMonCtr2_Lo = 0x154B4C; - constexpr unsigned int mmUMCCH21_PerfMonCtr3_Hi = 0x154B4F; - constexpr unsigned int mmUMCCH21_PerfMonCtr3_Lo = 0x154B4E; - constexpr unsigned int mmUMCCH21_PerfMonCtr4_Hi = 0x154B51; - constexpr unsigned int mmUMCCH21_PerfMonCtr4_Lo = 0x154B50; - constexpr unsigned int mmUMCCH21_PerfMonCtr5_Hi = 0x154B53; - constexpr unsigned int mmUMCCH21_PerfMonCtr5_Lo = 0x154B52; - constexpr unsigned int mmUMCCH21_PerfMonCtrClk_Hi = 0x154B49; - constexpr unsigned int mmUMCCH21_PerfMonCtrClk_Lo = 0x154B48; - constexpr unsigned int mmUMCCH22_PerfMonCtl1 = 0x155341; - constexpr unsigned int mmUMCCH22_PerfMonCtl2 = 0x155342; - constexpr unsigned int mmUMCCH22_PerfMonCtl3 = 0x155343; - constexpr unsigned int mmUMCCH22_PerfMonCtl4 = 0x155344; - constexpr unsigned int mmUMCCH22_PerfMonCtl5 = 0x155345; - constexpr unsigned int mmUMCCH22_PerfMonCtlClk = 0x155340; - constexpr unsigned int mmUMCCH22_PerfMonCtr1_Hi = 0x15534B; - constexpr unsigned int mmUMCCH22_PerfMonCtr1_Lo = 0x15534A; - constexpr unsigned int mmUMCCH22_PerfMonCtr2_Hi = 0x15534D; - constexpr unsigned int mmUMCCH22_PerfMonCtr2_Lo = 0x15534C; - constexpr unsigned int mmUMCCH22_PerfMonCtr3_Hi = 0x15534F; - constexpr unsigned int mmUMCCH22_PerfMonCtr3_Lo = 0x15534E; - constexpr unsigned int mmUMCCH22_PerfMonCtr4_Hi = 0x155351; - constexpr unsigned int mmUMCCH22_PerfMonCtr4_Lo = 0x155350; - constexpr unsigned int mmUMCCH22_PerfMonCtr5_Hi = 0x155353; - constexpr unsigned int mmUMCCH22_PerfMonCtr5_Lo = 0x155352; - constexpr unsigned int mmUMCCH22_PerfMonCtrClk_Hi = 0x155349; - constexpr unsigned int mmUMCCH22_PerfMonCtrClk_Lo = 0x155348; - constexpr unsigned int mmUMCCH23_PerfMonCtl1 = 0x155B41; - constexpr unsigned int mmUMCCH23_PerfMonCtl2 = 0x155B42; - constexpr unsigned int mmUMCCH23_PerfMonCtl3 = 0x155B43; - constexpr unsigned int mmUMCCH23_PerfMonCtl4 = 0x155B44; - constexpr unsigned int mmUMCCH23_PerfMonCtl5 = 0x155B45; - constexpr unsigned int mmUMCCH23_PerfMonCtlClk = 0x155B40; - constexpr unsigned int mmUMCCH23_PerfMonCtr1_Hi = 0x155B4B; - constexpr unsigned int mmUMCCH23_PerfMonCtr1_Lo = 0x155B4A; - constexpr unsigned int mmUMCCH23_PerfMonCtr2_Hi = 0x155B4D; - constexpr unsigned int mmUMCCH23_PerfMonCtr2_Lo = 0x155B4C; - constexpr unsigned int mmUMCCH23_PerfMonCtr3_Hi = 0x155B4F; - constexpr unsigned int mmUMCCH23_PerfMonCtr3_Lo = 0x155B4E; - constexpr unsigned int mmUMCCH23_PerfMonCtr4_Hi = 0x155B51; - constexpr unsigned int mmUMCCH23_PerfMonCtr4_Lo = 0x155B50; - constexpr unsigned int mmUMCCH23_PerfMonCtr5_Hi = 0x155B53; - constexpr unsigned int mmUMCCH23_PerfMonCtr5_Lo = 0x155B52; - constexpr unsigned int mmUMCCH23_PerfMonCtrClk_Hi = 0x155B49; - constexpr unsigned int mmUMCCH23_PerfMonCtrClk_Lo = 0x155B48; - constexpr unsigned int mmUMCCH24_PerfMonCtl1 = 0x194341; - constexpr unsigned int mmUMCCH24_PerfMonCtl2 = 0x194342; - constexpr unsigned int mmUMCCH24_PerfMonCtl3 = 0x194343; - constexpr unsigned int mmUMCCH24_PerfMonCtl4 = 0x194344; - constexpr unsigned int mmUMCCH24_PerfMonCtl5 = 0x194345; - constexpr unsigned int mmUMCCH24_PerfMonCtlClk = 0x194340; - constexpr unsigned int mmUMCCH24_PerfMonCtr1_Hi = 0x19434B; - constexpr unsigned int mmUMCCH24_PerfMonCtr1_Lo = 0x19434A; - constexpr unsigned int mmUMCCH24_PerfMonCtr2_Hi = 0x19434D; - constexpr unsigned int mmUMCCH24_PerfMonCtr2_Lo = 0x19434C; - constexpr unsigned int mmUMCCH24_PerfMonCtr3_Hi = 0x19434F; - constexpr unsigned int mmUMCCH24_PerfMonCtr3_Lo = 0x19434E; - constexpr unsigned int mmUMCCH24_PerfMonCtr4_Hi = 0x194351; - constexpr unsigned int mmUMCCH24_PerfMonCtr4_Lo = 0x194350; - constexpr unsigned int mmUMCCH24_PerfMonCtr5_Hi = 0x194353; - constexpr unsigned int mmUMCCH24_PerfMonCtr5_Lo = 0x194352; - constexpr unsigned int mmUMCCH24_PerfMonCtrClk_Hi = 0x194349; - constexpr unsigned int mmUMCCH24_PerfMonCtrClk_Lo = 0x194348; - constexpr unsigned int mmUMCCH25_PerfMonCtl1 = 0x194B41; - constexpr unsigned int mmUMCCH25_PerfMonCtl2 = 0x194B42; - constexpr unsigned int mmUMCCH25_PerfMonCtl3 = 0x194B43; - constexpr unsigned int mmUMCCH25_PerfMonCtl4 = 0x194B44; - constexpr unsigned int mmUMCCH25_PerfMonCtl5 = 0x194B45; - constexpr unsigned int mmUMCCH25_PerfMonCtlClk = 0x194B40; - constexpr unsigned int mmUMCCH25_PerfMonCtr1_Hi = 0x194B4B; - constexpr unsigned int mmUMCCH25_PerfMonCtr1_Lo = 0x194B4A; - constexpr unsigned int mmUMCCH25_PerfMonCtr2_Hi = 0x194B4D; - constexpr unsigned int mmUMCCH25_PerfMonCtr2_Lo = 0x194B4C; - constexpr unsigned int mmUMCCH25_PerfMonCtr3_Hi = 0x194B4F; - constexpr unsigned int mmUMCCH25_PerfMonCtr3_Lo = 0x194B4E; - constexpr unsigned int mmUMCCH25_PerfMonCtr4_Hi = 0x194B51; - constexpr unsigned int mmUMCCH25_PerfMonCtr4_Lo = 0x194B50; - constexpr unsigned int mmUMCCH25_PerfMonCtr5_Hi = 0x194B53; - constexpr unsigned int mmUMCCH25_PerfMonCtr5_Lo = 0x194B52; - constexpr unsigned int mmUMCCH25_PerfMonCtrClk_Hi = 0x194B49; - constexpr unsigned int mmUMCCH25_PerfMonCtrClk_Lo = 0x194B48; - constexpr unsigned int mmUMCCH26_PerfMonCtl1 = 0x195341; - constexpr unsigned int mmUMCCH26_PerfMonCtl2 = 0x195342; - constexpr unsigned int mmUMCCH26_PerfMonCtl3 = 0x195343; - constexpr unsigned int mmUMCCH26_PerfMonCtl4 = 0x195344; - constexpr unsigned int mmUMCCH26_PerfMonCtl5 = 0x195345; - constexpr unsigned int mmUMCCH26_PerfMonCtlClk = 0x195340; - constexpr unsigned int mmUMCCH26_PerfMonCtr1_Hi = 0x19534B; - constexpr unsigned int mmUMCCH26_PerfMonCtr1_Lo = 0x19534A; - constexpr unsigned int mmUMCCH26_PerfMonCtr2_Hi = 0x19534D; - constexpr unsigned int mmUMCCH26_PerfMonCtr2_Lo = 0x19534C; - constexpr unsigned int mmUMCCH26_PerfMonCtr3_Hi = 0x19534F; - constexpr unsigned int mmUMCCH26_PerfMonCtr3_Lo = 0x19534E; - constexpr unsigned int mmUMCCH26_PerfMonCtr4_Hi = 0x195351; - constexpr unsigned int mmUMCCH26_PerfMonCtr4_Lo = 0x195350; - constexpr unsigned int mmUMCCH26_PerfMonCtr5_Hi = 0x195353; - constexpr unsigned int mmUMCCH26_PerfMonCtr5_Lo = 0x195352; - constexpr unsigned int mmUMCCH26_PerfMonCtrClk_Hi = 0x195349; - constexpr unsigned int mmUMCCH26_PerfMonCtrClk_Lo = 0x195348; - constexpr unsigned int mmUMCCH27_PerfMonCtl1 = 0x195B41; - constexpr unsigned int mmUMCCH27_PerfMonCtl2 = 0x195B42; - constexpr unsigned int mmUMCCH27_PerfMonCtl3 = 0x195B43; - constexpr unsigned int mmUMCCH27_PerfMonCtl4 = 0x195B44; - constexpr unsigned int mmUMCCH27_PerfMonCtl5 = 0x195B45; - constexpr unsigned int mmUMCCH27_PerfMonCtlClk = 0x195B40; - constexpr unsigned int mmUMCCH27_PerfMonCtr1_Hi = 0x195B4B; - constexpr unsigned int mmUMCCH27_PerfMonCtr1_Lo = 0x195B4A; - constexpr unsigned int mmUMCCH27_PerfMonCtr2_Hi = 0x195B4D; - constexpr unsigned int mmUMCCH27_PerfMonCtr2_Lo = 0x195B4C; - constexpr unsigned int mmUMCCH27_PerfMonCtr3_Hi = 0x195B4F; - constexpr unsigned int mmUMCCH27_PerfMonCtr3_Lo = 0x195B4E; - constexpr unsigned int mmUMCCH27_PerfMonCtr4_Hi = 0x195B51; - constexpr unsigned int mmUMCCH27_PerfMonCtr4_Lo = 0x195B50; - constexpr unsigned int mmUMCCH27_PerfMonCtr5_Hi = 0x195B53; - constexpr unsigned int mmUMCCH27_PerfMonCtr5_Lo = 0x195B52; - constexpr unsigned int mmUMCCH27_PerfMonCtrClk_Hi = 0x195B49; - constexpr unsigned int mmUMCCH27_PerfMonCtrClk_Lo = 0x195B48; - constexpr unsigned int mmUMCCH28_PerfMonCtl1 = 0x1D4341; - constexpr unsigned int mmUMCCH28_PerfMonCtl2 = 0x1D4342; - constexpr unsigned int mmUMCCH28_PerfMonCtl3 = 0x1D4343; - constexpr unsigned int mmUMCCH28_PerfMonCtl4 = 0x1D4344; - constexpr unsigned int mmUMCCH28_PerfMonCtl5 = 0x1D4345; - constexpr unsigned int mmUMCCH28_PerfMonCtlClk = 0x1D4340; - constexpr unsigned int mmUMCCH28_PerfMonCtr1_Hi = 0x1D434B; - constexpr unsigned int mmUMCCH28_PerfMonCtr1_Lo = 0x1D434A; - constexpr unsigned int mmUMCCH28_PerfMonCtr2_Hi = 0x1D434D; - constexpr unsigned int mmUMCCH28_PerfMonCtr2_Lo = 0x1D434C; - constexpr unsigned int mmUMCCH28_PerfMonCtr3_Hi = 0x1D434F; - constexpr unsigned int mmUMCCH28_PerfMonCtr3_Lo = 0x1D434E; - constexpr unsigned int mmUMCCH28_PerfMonCtr4_Hi = 0x1D4351; - constexpr unsigned int mmUMCCH28_PerfMonCtr4_Lo = 0x1D4350; - constexpr unsigned int mmUMCCH28_PerfMonCtr5_Hi = 0x1D4353; - constexpr unsigned int mmUMCCH28_PerfMonCtr5_Lo = 0x1D4352; - constexpr unsigned int mmUMCCH28_PerfMonCtrClk_Hi = 0x1D4349; - constexpr unsigned int mmUMCCH28_PerfMonCtrClk_Lo = 0x1D4348; - constexpr unsigned int mmUMCCH29_PerfMonCtl1 = 0x1D4B41; - constexpr unsigned int mmUMCCH29_PerfMonCtl2 = 0x1D4B42; - constexpr unsigned int mmUMCCH29_PerfMonCtl3 = 0x1D4B43; - constexpr unsigned int mmUMCCH29_PerfMonCtl4 = 0x1D4B44; - constexpr unsigned int mmUMCCH29_PerfMonCtl5 = 0x1D4B45; - constexpr unsigned int mmUMCCH29_PerfMonCtlClk = 0x1D4B40; - constexpr unsigned int mmUMCCH29_PerfMonCtr1_Hi = 0x1D4B4B; - constexpr unsigned int mmUMCCH29_PerfMonCtr1_Lo = 0x1D4B4A; - constexpr unsigned int mmUMCCH29_PerfMonCtr2_Hi = 0x1D4B4D; - constexpr unsigned int mmUMCCH29_PerfMonCtr2_Lo = 0x1D4B4C; - constexpr unsigned int mmUMCCH29_PerfMonCtr3_Hi = 0x1D4B4F; - constexpr unsigned int mmUMCCH29_PerfMonCtr3_Lo = 0x1D4B4E; - constexpr unsigned int mmUMCCH29_PerfMonCtr4_Hi = 0x1D4B51; - constexpr unsigned int mmUMCCH29_PerfMonCtr4_Lo = 0x1D4B50; - constexpr unsigned int mmUMCCH29_PerfMonCtr5_Hi = 0x1D4B53; - constexpr unsigned int mmUMCCH29_PerfMonCtr5_Lo = 0x1D4B52; - constexpr unsigned int mmUMCCH29_PerfMonCtrClk_Hi = 0x1D4B49; - constexpr unsigned int mmUMCCH29_PerfMonCtrClk_Lo = 0x1D4B48; - constexpr unsigned int mmUMCCH30_PerfMonCtl1 = 0x1D5341; - constexpr unsigned int mmUMCCH30_PerfMonCtl2 = 0x1D5342; - constexpr unsigned int mmUMCCH30_PerfMonCtl3 = 0x1D5343; - constexpr unsigned int mmUMCCH30_PerfMonCtl4 = 0x1D5344; - constexpr unsigned int mmUMCCH30_PerfMonCtl5 = 0x1D5345; - constexpr unsigned int mmUMCCH30_PerfMonCtlClk = 0x1D5340; - constexpr unsigned int mmUMCCH30_PerfMonCtr1_Hi = 0x1D534B; - constexpr unsigned int mmUMCCH30_PerfMonCtr1_Lo = 0x1D534A; - constexpr unsigned int mmUMCCH30_PerfMonCtr2_Hi = 0x1D534D; - constexpr unsigned int mmUMCCH30_PerfMonCtr2_Lo = 0x1D534C; - constexpr unsigned int mmUMCCH30_PerfMonCtr3_Hi = 0x1D534F; - constexpr unsigned int mmUMCCH30_PerfMonCtr3_Lo = 0x1D534E; - constexpr unsigned int mmUMCCH30_PerfMonCtr4_Hi = 0x1D5351; - constexpr unsigned int mmUMCCH30_PerfMonCtr4_Lo = 0x1D5350; - constexpr unsigned int mmUMCCH30_PerfMonCtr5_Hi = 0x1D5353; - constexpr unsigned int mmUMCCH30_PerfMonCtr5_Lo = 0x1D5352; - constexpr unsigned int mmUMCCH30_PerfMonCtrClk_Hi = 0x1D5349; - constexpr unsigned int mmUMCCH30_PerfMonCtrClk_Lo = 0x1D5348; - constexpr unsigned int mmUMCCH31_PerfMonCtl1 = 0x1D5B41; - constexpr unsigned int mmUMCCH31_PerfMonCtl2 = 0x1D5B42; - constexpr unsigned int mmUMCCH31_PerfMonCtl3 = 0x1D5B43; - constexpr unsigned int mmUMCCH31_PerfMonCtl4 = 0x1D5B44; - constexpr unsigned int mmUMCCH31_PerfMonCtl5 = 0x1D5B45; - constexpr unsigned int mmUMCCH31_PerfMonCtlClk = 0x1D5B40; - constexpr unsigned int mmUMCCH31_PerfMonCtr1_Hi = 0x1D5B4B; - constexpr unsigned int mmUMCCH31_PerfMonCtr1_Lo = 0x1D5B4A; - constexpr unsigned int mmUMCCH31_PerfMonCtr2_Hi = 0x1D5B4D; - constexpr unsigned int mmUMCCH31_PerfMonCtr2_Lo = 0x1D5B4C; - constexpr unsigned int mmUMCCH31_PerfMonCtr3_Hi = 0x1D5B4F; - constexpr unsigned int mmUMCCH31_PerfMonCtr3_Lo = 0x1D5B4E; - constexpr unsigned int mmUMCCH31_PerfMonCtr4_Hi = 0x1D5B51; - constexpr unsigned int mmUMCCH31_PerfMonCtr4_Lo = 0x1D5B50; - constexpr unsigned int mmUMCCH31_PerfMonCtr5_Hi = 0x1D5B53; - constexpr unsigned int mmUMCCH31_PerfMonCtr5_Lo = 0x1D5B52; - constexpr unsigned int mmUMCCH31_PerfMonCtrClk_Hi = 0x1D5B49; - constexpr unsigned int mmUMCCH31_PerfMonCtrClk_Lo = 0x1D5B48; -} // namespace Vg20 - -namespace Vg20_Rv1x_Rv2x -{ - constexpr unsigned int mmMP1_SMN_FPS_CNT = 0x162C5; -} // namespace Vg20_Rv1x_Rv2x - -} // inline namespace Chip -} // namespace Gfx9 -} // namespace Pal diff --git a/lgc/imported/chip/gfx9/gfx9_plus_merged_registers.h b/lgc/imported/chip/gfx9/gfx9_plus_merged_registers.h deleted file mode 100644 index 2b224cf1bc..0000000000 --- a/lgc/imported/chip/gfx9/gfx9_plus_merged_registers.h +++ /dev/null @@ -1,48900 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ - -#pragma once - -// -// Make sure the necessary endian defines are there. -// -#ifndef LITTLEENDIAN_CPU -#error "LITTLEENDIAN_CPU must be defined" -#endif - -namespace Pal -{ -namespace Gfx9 -{ -inline namespace Chip -{ -union ATC_L2_PERFCOUNTER0_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union ATC_L2_PERFCOUNTER1_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union ATC_L2_PERFCOUNTER_HI { - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union ATC_L2_PERFCOUNTER_LO { - struct { - unsigned int COUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union ATC_L2_PERFCOUNTER_RSLT_CNTL { - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union ATC_PERFCOUNTER0_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union ATC_PERFCOUNTER1_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union ATC_PERFCOUNTER2_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union ATC_PERFCOUNTER3_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union ATC_PERFCOUNTER_HI { - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union ATC_PERFCOUNTER_LO { - struct { - unsigned int COUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union ATC_PERFCOUNTER_RSLT_CNTL { - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_BLEND0_CONTROL { - struct { - unsigned int COLOR_SRCBLEND : 5; - unsigned int COLOR_COMB_FCN : 3; - unsigned int COLOR_DESTBLEND : 5; - unsigned int : 3; - unsigned int ALPHA_SRCBLEND : 5; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int ALPHA_DESTBLEND : 5; - unsigned int SEPARATE_ALPHA_BLEND : 1; - unsigned int ENABLE : 1; - unsigned int DISABLE_ROP3 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_BLEND1_CONTROL { - struct { - unsigned int COLOR_SRCBLEND : 5; - unsigned int COLOR_COMB_FCN : 3; - unsigned int COLOR_DESTBLEND : 5; - unsigned int : 3; - unsigned int ALPHA_SRCBLEND : 5; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int ALPHA_DESTBLEND : 5; - unsigned int SEPARATE_ALPHA_BLEND : 1; - unsigned int ENABLE : 1; - unsigned int DISABLE_ROP3 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_BLEND2_CONTROL { - struct { - unsigned int COLOR_SRCBLEND : 5; - unsigned int COLOR_COMB_FCN : 3; - unsigned int COLOR_DESTBLEND : 5; - unsigned int : 3; - unsigned int ALPHA_SRCBLEND : 5; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int ALPHA_DESTBLEND : 5; - unsigned int SEPARATE_ALPHA_BLEND : 1; - unsigned int ENABLE : 1; - unsigned int DISABLE_ROP3 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_BLEND3_CONTROL { - struct { - unsigned int COLOR_SRCBLEND : 5; - unsigned int COLOR_COMB_FCN : 3; - unsigned int COLOR_DESTBLEND : 5; - unsigned int : 3; - unsigned int ALPHA_SRCBLEND : 5; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int ALPHA_DESTBLEND : 5; - unsigned int SEPARATE_ALPHA_BLEND : 1; - unsigned int ENABLE : 1; - unsigned int DISABLE_ROP3 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_BLEND4_CONTROL { - struct { - unsigned int COLOR_SRCBLEND : 5; - unsigned int COLOR_COMB_FCN : 3; - unsigned int COLOR_DESTBLEND : 5; - unsigned int : 3; - unsigned int ALPHA_SRCBLEND : 5; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int ALPHA_DESTBLEND : 5; - unsigned int SEPARATE_ALPHA_BLEND : 1; - unsigned int ENABLE : 1; - unsigned int DISABLE_ROP3 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_BLEND5_CONTROL { - struct { - unsigned int COLOR_SRCBLEND : 5; - unsigned int COLOR_COMB_FCN : 3; - unsigned int COLOR_DESTBLEND : 5; - unsigned int : 3; - unsigned int ALPHA_SRCBLEND : 5; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int ALPHA_DESTBLEND : 5; - unsigned int SEPARATE_ALPHA_BLEND : 1; - unsigned int ENABLE : 1; - unsigned int DISABLE_ROP3 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_BLEND6_CONTROL { - struct { - unsigned int COLOR_SRCBLEND : 5; - unsigned int COLOR_COMB_FCN : 3; - unsigned int COLOR_DESTBLEND : 5; - unsigned int : 3; - unsigned int ALPHA_SRCBLEND : 5; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int ALPHA_DESTBLEND : 5; - unsigned int SEPARATE_ALPHA_BLEND : 1; - unsigned int ENABLE : 1; - unsigned int DISABLE_ROP3 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_BLEND7_CONTROL { - struct { - unsigned int COLOR_SRCBLEND : 5; - unsigned int COLOR_COMB_FCN : 3; - unsigned int COLOR_DESTBLEND : 5; - unsigned int : 3; - unsigned int ALPHA_SRCBLEND : 5; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int ALPHA_DESTBLEND : 5; - unsigned int SEPARATE_ALPHA_BLEND : 1; - unsigned int ENABLE : 1; - unsigned int DISABLE_ROP3 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_BLEND_ALPHA { - struct { - unsigned int BLEND_ALPHA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_BLEND_BLUE { - struct { - unsigned int BLEND_BLUE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_BLEND_GREEN { - struct { - unsigned int BLEND_GREEN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_BLEND_RED { - struct { - unsigned int BLEND_RED : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_CACHE_EVICT_POINTS { - struct { - unsigned int : 16; - unsigned int DCC_CACHE_EVICT_POINT : 8; - unsigned int CC_CACHE_EVICT_POINT : 8; - } bits, bitfields; - struct { - unsigned int CM_CACHE_EVICT_POINT : 8; - unsigned int FC_CACHE_EVICT_POINT : 8; - unsigned int : 16; - } gfx10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int CC_COLOR_EVICT_POINT : 8; - unsigned int CC_FMASK_EVICT_POINT : 8; - unsigned int : 16; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_CGTT_SCLK_CTRL { - struct { - unsigned int ON_DELAY : 4; - unsigned int OFF_HYSTERESIS : 8; - unsigned int : 4; - unsigned int SOFT_STALL_OVERRIDE7 : 1; - unsigned int SOFT_STALL_OVERRIDE6 : 1; - unsigned int SOFT_STALL_OVERRIDE5 : 1; - unsigned int SOFT_STALL_OVERRIDE4 : 1; - unsigned int SOFT_STALL_OVERRIDE3 : 1; - unsigned int SOFT_STALL_OVERRIDE2 : 1; - unsigned int SOFT_STALL_OVERRIDE1 : 1; - unsigned int SOFT_STALL_OVERRIDE0 : 1; - unsigned int SOFT_OVERRIDE7 : 1; - unsigned int SOFT_OVERRIDE6 : 1; - unsigned int SOFT_OVERRIDE5 : 1; - unsigned int SOFT_OVERRIDE4 : 1; - unsigned int SOFT_OVERRIDE3 : 1; - unsigned int SOFT_OVERRIDE2 : 1; - unsigned int SOFT_OVERRIDE1 : 1; - unsigned int SOFT_OVERRIDE0 : 1; - } core; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_CGTT_SCLK_CTRL1 { - struct { - unsigned int ON_DELAY : 4; - unsigned int OFF_HYSTERESIS : 8; - unsigned int : 4; - unsigned int SOFT_STALL_OVERRIDE7 : 1; - unsigned int SOFT_STALL_OVERRIDE6 : 1; - unsigned int SOFT_STALL_OVERRIDE5 : 1; - unsigned int SOFT_STALL_OVERRIDE4 : 1; - unsigned int SOFT_STALL_OVERRIDE3 : 1; - unsigned int SOFT_STALL_OVERRIDE2 : 1; - unsigned int SOFT_STALL_OVERRIDE1 : 1; - unsigned int SOFT_STALL_OVERRIDE0 : 1; - unsigned int SOFT_OVERRIDE7 : 1; - unsigned int SOFT_OVERRIDE6 : 1; - unsigned int SOFT_OVERRIDE5 : 1; - unsigned int SOFT_OVERRIDE4 : 1; - unsigned int SOFT_OVERRIDE3 : 1; - unsigned int SOFT_OVERRIDE2 : 1; - unsigned int SOFT_OVERRIDE1 : 1; - unsigned int SOFT_OVERRIDE0 : 1; - } gfx103; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_ATTRIB { - struct { - unsigned int : 12; - unsigned int NUM_SAMPLES : 3; - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int : 14; - } most; - struct { - unsigned int MIP0_DEPTH : 11; - unsigned int META_LINEAR : 1; - unsigned int : 6; - unsigned int COLOR_SW_MODE : 5; - unsigned int FMASK_SW_MODE : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int RB_ALIGNED : 1; - unsigned int PIPE_ALIGNED : 1; - } gfx09; - struct { - unsigned int TILE_MODE_INDEX : 5; - unsigned int FMASK_TILE_MODE_INDEX : 5; - unsigned int FMASK_BANK_HEIGHT : 2; - unsigned int : 6; - unsigned int DISABLE_FMASK_NOFETCH_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int : 12; - } gfx10Core; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int DISABLE_FMASK_NOALLOC_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int FORCE_LIMIT_COLOR_SECTOR_TO_256B_MAX : 1; - unsigned int : 26; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_ATTRIB2 { - struct { - unsigned int MIP0_HEIGHT : 14; - unsigned int MIP0_WIDTH : 14; - unsigned int MAX_MIP : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_ATTRIB3 { - struct { - unsigned int MIP0_DEPTH : 13; - unsigned int META_LINEAR : 1; - unsigned int COLOR_SW_MODE : 5; - unsigned int : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int : 4; - unsigned int DCC_PIPE_ALIGNED : 1; - unsigned int : 1; - } bits, bitfields; - struct { - unsigned int : 19; - unsigned int FMASK_SW_MODE : 5; - unsigned int : 2; - unsigned int CMASK_PIPE_ALIGNED : 1; - unsigned int : 5; - } gfx10; - struct { - unsigned int : 27; - unsigned int RESOURCE_LEVEL : 3; - unsigned int : 2; - } gfx10Core; - struct { - unsigned int : 31; - unsigned int VRS_RATE_HINT_ENABLE : 1; - } gfx10Vrs; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_CLEAR_WORD0 { - struct { - unsigned int CLEAR_WORD0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_CLEAR_WORD1 { - struct { - unsigned int CLEAR_WORD1 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_CMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_CMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_CMASK_SLICE { - struct { - unsigned int TILE_MAX : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_DCC_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_DCC_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_DCC_CONTROL { - struct { - unsigned int : 2; - unsigned int MAX_UNCOMPRESSED_BLOCK_SIZE : 2; - unsigned int MIN_COMPRESSED_BLOCK_SIZE : 1; - unsigned int MAX_COMPRESSED_BLOCK_SIZE : 2; - unsigned int COLOR_TRANSFORM : 2; - unsigned int INDEPENDENT_64B_BLOCKS : 1; - unsigned int : 22; - } bits, bitfields; - struct { - unsigned int OVERWRITE_COMBINER_DISABLE : 1; - unsigned int KEY_CLEAR_ENABLE : 1; - unsigned int : 8; - unsigned int LOSSY_RGB_PRECISION : 4; - unsigned int LOSSY_ALPHA_PRECISION : 4; - unsigned int : 14; - } gfx09_10; - struct { - unsigned int : 18; - unsigned int DISABLE_CONSTANT_ENCODE_REG : 1; - unsigned int ENABLE_CONSTANT_ENCODE_REG_WRITE : 1; - unsigned int : 12; - } gfx09_1xPlus; - struct { - unsigned int : 20; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - } gfx10; - struct { - unsigned int : 22; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int : 9; - } gfx103Derivative; - struct { - unsigned int : 21; - unsigned int SKIP_LOW_COMP_RATIO : 1; - unsigned int : 10; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SAMPLE_MASK_TRACKER_DISABLE : 1; - unsigned int SAMPLE_MASK_TRACKER_FEA_FORCE : 1; - unsigned int : 8; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - unsigned int FDCC_ENABLE : 1; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int FRAGMENT_COMPRESS_DISABLE : 1; - unsigned int : 7; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_FMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_FMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_FMASK_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_INFO { - struct { - unsigned int : 8; - unsigned int NUMBER_TYPE : 3; - unsigned int COMP_SWAP : 2; - unsigned int : 2; - unsigned int BLEND_CLAMP : 1; - unsigned int BLEND_BYPASS : 1; - unsigned int SIMPLE_FLOAT : 1; - unsigned int ROUND_MODE : 1; - unsigned int : 1; - unsigned int BLEND_OPT_DONT_RD_DST : 3; - unsigned int BLEND_OPT_DISCARD_PIXEL : 3; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 31; - unsigned int ALT_TILE_MODE : 1; - } most; - struct { - unsigned int ENDIAN : 2; - unsigned int FORMAT : 5; - unsigned int : 6; - unsigned int FAST_CLEAR : 1; - unsigned int COMPRESSION : 1; - unsigned int : 11; - unsigned int FMASK_COMPRESSION_DISABLE : 1; - unsigned int FMASK_COMPRESS_1FRAG_ONLY : 1; - unsigned int DCC_ENABLE : 1; - unsigned int CMASK_ADDR_TYPE : 2; - unsigned int : 1; - } gfx09_10; - struct { - unsigned int : 31; - unsigned int NBC_TILING : 1; - } gfx103; - struct { - unsigned int : 19; - unsigned int CMASK_IS_LINEAR : 1; - unsigned int : 12; - } gfx10Core; - struct { - unsigned int : 7; - unsigned int LINEAR_GENERAL : 1; - unsigned int : 24; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int FORMAT : 5; - unsigned int : 27; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_PITCH { - struct { - unsigned int TILE_MAX : 11; - unsigned int : 9; - unsigned int FMASK_TILE_MAX : 11; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR0_VIEW { - struct { - unsigned int SLICE_START : 11; - unsigned int : 2; - unsigned int SLICE_MAX : 11; - unsigned int MIP_LEVEL : 4; - unsigned int : 4; - } gfx09; - struct { - unsigned int SLICE_START : 13; - unsigned int SLICE_MAX : 13; - unsigned int MIP_LEVEL : 4; - unsigned int : 2; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_ATTRIB { - struct { - unsigned int : 12; - unsigned int NUM_SAMPLES : 3; - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int : 14; - } most; - struct { - unsigned int MIP0_DEPTH : 11; - unsigned int META_LINEAR : 1; - unsigned int : 6; - unsigned int COLOR_SW_MODE : 5; - unsigned int FMASK_SW_MODE : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int RB_ALIGNED : 1; - unsigned int PIPE_ALIGNED : 1; - } gfx09; - struct { - unsigned int TILE_MODE_INDEX : 5; - unsigned int FMASK_TILE_MODE_INDEX : 5; - unsigned int FMASK_BANK_HEIGHT : 2; - unsigned int : 6; - unsigned int DISABLE_FMASK_NOFETCH_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int : 12; - } gfx10Core; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int DISABLE_FMASK_NOALLOC_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int FORCE_LIMIT_COLOR_SECTOR_TO_256B_MAX : 1; - unsigned int : 26; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_ATTRIB2 { - struct { - unsigned int MIP0_HEIGHT : 14; - unsigned int MIP0_WIDTH : 14; - unsigned int MAX_MIP : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_ATTRIB3 { - struct { - unsigned int MIP0_DEPTH : 13; - unsigned int META_LINEAR : 1; - unsigned int COLOR_SW_MODE : 5; - unsigned int : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int : 4; - unsigned int DCC_PIPE_ALIGNED : 1; - unsigned int : 1; - } bits, bitfields; - struct { - unsigned int : 19; - unsigned int FMASK_SW_MODE : 5; - unsigned int : 2; - unsigned int CMASK_PIPE_ALIGNED : 1; - unsigned int : 5; - } gfx10; - struct { - unsigned int : 27; - unsigned int RESOURCE_LEVEL : 3; - unsigned int : 2; - } gfx10Core; - struct { - unsigned int : 31; - unsigned int VRS_RATE_HINT_ENABLE : 1; - } gfx10Vrs; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_CLEAR_WORD0 { - struct { - unsigned int CLEAR_WORD0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_CLEAR_WORD1 { - struct { - unsigned int CLEAR_WORD1 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_CMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_CMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_CMASK_SLICE { - struct { - unsigned int TILE_MAX : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_DCC_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_DCC_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_DCC_CONTROL { - struct { - unsigned int : 2; - unsigned int MAX_UNCOMPRESSED_BLOCK_SIZE : 2; - unsigned int MIN_COMPRESSED_BLOCK_SIZE : 1; - unsigned int MAX_COMPRESSED_BLOCK_SIZE : 2; - unsigned int COLOR_TRANSFORM : 2; - unsigned int INDEPENDENT_64B_BLOCKS : 1; - unsigned int : 22; - } bits, bitfields; - struct { - unsigned int OVERWRITE_COMBINER_DISABLE : 1; - unsigned int KEY_CLEAR_ENABLE : 1; - unsigned int : 8; - unsigned int LOSSY_RGB_PRECISION : 4; - unsigned int LOSSY_ALPHA_PRECISION : 4; - unsigned int : 14; - } gfx09_10; - struct { - unsigned int : 18; - unsigned int DISABLE_CONSTANT_ENCODE_REG : 1; - unsigned int ENABLE_CONSTANT_ENCODE_REG_WRITE : 1; - unsigned int : 12; - } gfx09_1xPlus; - struct { - unsigned int : 20; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - } gfx10; - struct { - unsigned int : 22; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int : 9; - } gfx103Derivative; - struct { - unsigned int : 21; - unsigned int SKIP_LOW_COMP_RATIO : 1; - unsigned int : 10; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SAMPLE_MASK_TRACKER_DISABLE : 1; - unsigned int SAMPLE_MASK_TRACKER_FEA_FORCE : 1; - unsigned int : 8; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - unsigned int FDCC_ENABLE : 1; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int FRAGMENT_COMPRESS_DISABLE : 1; - unsigned int : 7; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_FMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_FMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_FMASK_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_INFO { - struct { - unsigned int : 8; - unsigned int NUMBER_TYPE : 3; - unsigned int COMP_SWAP : 2; - unsigned int : 2; - unsigned int BLEND_CLAMP : 1; - unsigned int BLEND_BYPASS : 1; - unsigned int SIMPLE_FLOAT : 1; - unsigned int ROUND_MODE : 1; - unsigned int : 1; - unsigned int BLEND_OPT_DONT_RD_DST : 3; - unsigned int BLEND_OPT_DISCARD_PIXEL : 3; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 31; - unsigned int ALT_TILE_MODE : 1; - } most; - struct { - unsigned int ENDIAN : 2; - unsigned int FORMAT : 5; - unsigned int : 6; - unsigned int FAST_CLEAR : 1; - unsigned int COMPRESSION : 1; - unsigned int : 11; - unsigned int FMASK_COMPRESSION_DISABLE : 1; - unsigned int FMASK_COMPRESS_1FRAG_ONLY : 1; - unsigned int DCC_ENABLE : 1; - unsigned int CMASK_ADDR_TYPE : 2; - unsigned int : 1; - } gfx09_10; - struct { - unsigned int : 31; - unsigned int NBC_TILING : 1; - } gfx103; - struct { - unsigned int : 19; - unsigned int CMASK_IS_LINEAR : 1; - unsigned int : 12; - } gfx10Core; - struct { - unsigned int : 7; - unsigned int LINEAR_GENERAL : 1; - unsigned int : 24; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int FORMAT : 5; - unsigned int : 27; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_PITCH { - struct { - unsigned int TILE_MAX : 11; - unsigned int : 9; - unsigned int FMASK_TILE_MAX : 11; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR1_VIEW { - struct { - unsigned int SLICE_START : 11; - unsigned int : 2; - unsigned int SLICE_MAX : 11; - unsigned int MIP_LEVEL : 4; - unsigned int : 4; - } gfx09; - struct { - unsigned int SLICE_START : 13; - unsigned int SLICE_MAX : 13; - unsigned int MIP_LEVEL : 4; - unsigned int : 2; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_ATTRIB { - struct { - unsigned int : 12; - unsigned int NUM_SAMPLES : 3; - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int : 14; - } most; - struct { - unsigned int MIP0_DEPTH : 11; - unsigned int META_LINEAR : 1; - unsigned int : 6; - unsigned int COLOR_SW_MODE : 5; - unsigned int FMASK_SW_MODE : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int RB_ALIGNED : 1; - unsigned int PIPE_ALIGNED : 1; - } gfx09; - struct { - unsigned int TILE_MODE_INDEX : 5; - unsigned int FMASK_TILE_MODE_INDEX : 5; - unsigned int FMASK_BANK_HEIGHT : 2; - unsigned int : 6; - unsigned int DISABLE_FMASK_NOFETCH_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int : 12; - } gfx10Core; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int DISABLE_FMASK_NOALLOC_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int FORCE_LIMIT_COLOR_SECTOR_TO_256B_MAX : 1; - unsigned int : 26; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_ATTRIB2 { - struct { - unsigned int MIP0_HEIGHT : 14; - unsigned int MIP0_WIDTH : 14; - unsigned int MAX_MIP : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_ATTRIB3 { - struct { - unsigned int MIP0_DEPTH : 13; - unsigned int META_LINEAR : 1; - unsigned int COLOR_SW_MODE : 5; - unsigned int : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int : 4; - unsigned int DCC_PIPE_ALIGNED : 1; - unsigned int : 1; - } bits, bitfields; - struct { - unsigned int : 19; - unsigned int FMASK_SW_MODE : 5; - unsigned int : 2; - unsigned int CMASK_PIPE_ALIGNED : 1; - unsigned int : 5; - } gfx10; - struct { - unsigned int : 27; - unsigned int RESOURCE_LEVEL : 3; - unsigned int : 2; - } gfx10Core; - struct { - unsigned int : 31; - unsigned int VRS_RATE_HINT_ENABLE : 1; - } gfx10Vrs; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_CLEAR_WORD0 { - struct { - unsigned int CLEAR_WORD0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_CLEAR_WORD1 { - struct { - unsigned int CLEAR_WORD1 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_CMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_CMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_CMASK_SLICE { - struct { - unsigned int TILE_MAX : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_DCC_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_DCC_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_DCC_CONTROL { - struct { - unsigned int : 2; - unsigned int MAX_UNCOMPRESSED_BLOCK_SIZE : 2; - unsigned int MIN_COMPRESSED_BLOCK_SIZE : 1; - unsigned int MAX_COMPRESSED_BLOCK_SIZE : 2; - unsigned int COLOR_TRANSFORM : 2; - unsigned int INDEPENDENT_64B_BLOCKS : 1; - unsigned int : 22; - } bits, bitfields; - struct { - unsigned int OVERWRITE_COMBINER_DISABLE : 1; - unsigned int KEY_CLEAR_ENABLE : 1; - unsigned int : 8; - unsigned int LOSSY_RGB_PRECISION : 4; - unsigned int LOSSY_ALPHA_PRECISION : 4; - unsigned int : 14; - } gfx09_10; - struct { - unsigned int : 18; - unsigned int DISABLE_CONSTANT_ENCODE_REG : 1; - unsigned int ENABLE_CONSTANT_ENCODE_REG_WRITE : 1; - unsigned int : 12; - } gfx09_1xPlus; - struct { - unsigned int : 20; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - } gfx10; - struct { - unsigned int : 22; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int : 9; - } gfx103Derivative; - struct { - unsigned int : 21; - unsigned int SKIP_LOW_COMP_RATIO : 1; - unsigned int : 10; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SAMPLE_MASK_TRACKER_DISABLE : 1; - unsigned int SAMPLE_MASK_TRACKER_FEA_FORCE : 1; - unsigned int : 8; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - unsigned int FDCC_ENABLE : 1; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int FRAGMENT_COMPRESS_DISABLE : 1; - unsigned int : 7; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_FMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_FMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_FMASK_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_INFO { - struct { - unsigned int : 8; - unsigned int NUMBER_TYPE : 3; - unsigned int COMP_SWAP : 2; - unsigned int : 2; - unsigned int BLEND_CLAMP : 1; - unsigned int BLEND_BYPASS : 1; - unsigned int SIMPLE_FLOAT : 1; - unsigned int ROUND_MODE : 1; - unsigned int : 1; - unsigned int BLEND_OPT_DONT_RD_DST : 3; - unsigned int BLEND_OPT_DISCARD_PIXEL : 3; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 31; - unsigned int ALT_TILE_MODE : 1; - } most; - struct { - unsigned int ENDIAN : 2; - unsigned int FORMAT : 5; - unsigned int : 6; - unsigned int FAST_CLEAR : 1; - unsigned int COMPRESSION : 1; - unsigned int : 11; - unsigned int FMASK_COMPRESSION_DISABLE : 1; - unsigned int FMASK_COMPRESS_1FRAG_ONLY : 1; - unsigned int DCC_ENABLE : 1; - unsigned int CMASK_ADDR_TYPE : 2; - unsigned int : 1; - } gfx09_10; - struct { - unsigned int : 31; - unsigned int NBC_TILING : 1; - } gfx103; - struct { - unsigned int : 19; - unsigned int CMASK_IS_LINEAR : 1; - unsigned int : 12; - } gfx10Core; - struct { - unsigned int : 7; - unsigned int LINEAR_GENERAL : 1; - unsigned int : 24; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int FORMAT : 5; - unsigned int : 27; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_PITCH { - struct { - unsigned int TILE_MAX : 11; - unsigned int : 9; - unsigned int FMASK_TILE_MAX : 11; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR2_VIEW { - struct { - unsigned int SLICE_START : 11; - unsigned int : 2; - unsigned int SLICE_MAX : 11; - unsigned int MIP_LEVEL : 4; - unsigned int : 4; - } gfx09; - struct { - unsigned int SLICE_START : 13; - unsigned int SLICE_MAX : 13; - unsigned int MIP_LEVEL : 4; - unsigned int : 2; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_ATTRIB { - struct { - unsigned int : 12; - unsigned int NUM_SAMPLES : 3; - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int : 14; - } most; - struct { - unsigned int MIP0_DEPTH : 11; - unsigned int META_LINEAR : 1; - unsigned int : 6; - unsigned int COLOR_SW_MODE : 5; - unsigned int FMASK_SW_MODE : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int RB_ALIGNED : 1; - unsigned int PIPE_ALIGNED : 1; - } gfx09; - struct { - unsigned int TILE_MODE_INDEX : 5; - unsigned int FMASK_TILE_MODE_INDEX : 5; - unsigned int FMASK_BANK_HEIGHT : 2; - unsigned int : 6; - unsigned int DISABLE_FMASK_NOFETCH_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int : 12; - } gfx10Core; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int DISABLE_FMASK_NOALLOC_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int FORCE_LIMIT_COLOR_SECTOR_TO_256B_MAX : 1; - unsigned int : 26; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_ATTRIB2 { - struct { - unsigned int MIP0_HEIGHT : 14; - unsigned int MIP0_WIDTH : 14; - unsigned int MAX_MIP : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_ATTRIB3 { - struct { - unsigned int MIP0_DEPTH : 13; - unsigned int META_LINEAR : 1; - unsigned int COLOR_SW_MODE : 5; - unsigned int : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int : 4; - unsigned int DCC_PIPE_ALIGNED : 1; - unsigned int : 1; - } bits, bitfields; - struct { - unsigned int : 19; - unsigned int FMASK_SW_MODE : 5; - unsigned int : 2; - unsigned int CMASK_PIPE_ALIGNED : 1; - unsigned int : 5; - } gfx10; - struct { - unsigned int : 27; - unsigned int RESOURCE_LEVEL : 3; - unsigned int : 2; - } gfx10Core; - struct { - unsigned int : 31; - unsigned int VRS_RATE_HINT_ENABLE : 1; - } gfx10Vrs; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_CLEAR_WORD0 { - struct { - unsigned int CLEAR_WORD0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_CLEAR_WORD1 { - struct { - unsigned int CLEAR_WORD1 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_CMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_CMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_CMASK_SLICE { - struct { - unsigned int TILE_MAX : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_DCC_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_DCC_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_DCC_CONTROL { - struct { - unsigned int : 2; - unsigned int MAX_UNCOMPRESSED_BLOCK_SIZE : 2; - unsigned int MIN_COMPRESSED_BLOCK_SIZE : 1; - unsigned int MAX_COMPRESSED_BLOCK_SIZE : 2; - unsigned int COLOR_TRANSFORM : 2; - unsigned int INDEPENDENT_64B_BLOCKS : 1; - unsigned int : 22; - } bits, bitfields; - struct { - unsigned int OVERWRITE_COMBINER_DISABLE : 1; - unsigned int KEY_CLEAR_ENABLE : 1; - unsigned int : 8; - unsigned int LOSSY_RGB_PRECISION : 4; - unsigned int LOSSY_ALPHA_PRECISION : 4; - unsigned int : 14; - } gfx09_10; - struct { - unsigned int : 18; - unsigned int DISABLE_CONSTANT_ENCODE_REG : 1; - unsigned int ENABLE_CONSTANT_ENCODE_REG_WRITE : 1; - unsigned int : 12; - } gfx09_1xPlus; - struct { - unsigned int : 20; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - } gfx10; - struct { - unsigned int : 22; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int : 9; - } gfx103Derivative; - struct { - unsigned int : 21; - unsigned int SKIP_LOW_COMP_RATIO : 1; - unsigned int : 10; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SAMPLE_MASK_TRACKER_DISABLE : 1; - unsigned int SAMPLE_MASK_TRACKER_FEA_FORCE : 1; - unsigned int : 8; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - unsigned int FDCC_ENABLE : 1; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int FRAGMENT_COMPRESS_DISABLE : 1; - unsigned int : 7; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_FMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_FMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_FMASK_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_INFO { - struct { - unsigned int : 8; - unsigned int NUMBER_TYPE : 3; - unsigned int COMP_SWAP : 2; - unsigned int : 2; - unsigned int BLEND_CLAMP : 1; - unsigned int BLEND_BYPASS : 1; - unsigned int SIMPLE_FLOAT : 1; - unsigned int ROUND_MODE : 1; - unsigned int : 1; - unsigned int BLEND_OPT_DONT_RD_DST : 3; - unsigned int BLEND_OPT_DISCARD_PIXEL : 3; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 31; - unsigned int ALT_TILE_MODE : 1; - } most; - struct { - unsigned int ENDIAN : 2; - unsigned int FORMAT : 5; - unsigned int : 6; - unsigned int FAST_CLEAR : 1; - unsigned int COMPRESSION : 1; - unsigned int : 11; - unsigned int FMASK_COMPRESSION_DISABLE : 1; - unsigned int FMASK_COMPRESS_1FRAG_ONLY : 1; - unsigned int DCC_ENABLE : 1; - unsigned int CMASK_ADDR_TYPE : 2; - unsigned int : 1; - } gfx09_10; - struct { - unsigned int : 31; - unsigned int NBC_TILING : 1; - } gfx103; - struct { - unsigned int : 19; - unsigned int CMASK_IS_LINEAR : 1; - unsigned int : 12; - } gfx10Core; - struct { - unsigned int : 7; - unsigned int LINEAR_GENERAL : 1; - unsigned int : 24; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int FORMAT : 5; - unsigned int : 27; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_PITCH { - struct { - unsigned int TILE_MAX : 11; - unsigned int : 9; - unsigned int FMASK_TILE_MAX : 11; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR3_VIEW { - struct { - unsigned int SLICE_START : 11; - unsigned int : 2; - unsigned int SLICE_MAX : 11; - unsigned int MIP_LEVEL : 4; - unsigned int : 4; - } gfx09; - struct { - unsigned int SLICE_START : 13; - unsigned int SLICE_MAX : 13; - unsigned int MIP_LEVEL : 4; - unsigned int : 2; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_ATTRIB { - struct { - unsigned int : 12; - unsigned int NUM_SAMPLES : 3; - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int : 14; - } most; - struct { - unsigned int MIP0_DEPTH : 11; - unsigned int META_LINEAR : 1; - unsigned int : 6; - unsigned int COLOR_SW_MODE : 5; - unsigned int FMASK_SW_MODE : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int RB_ALIGNED : 1; - unsigned int PIPE_ALIGNED : 1; - } gfx09; - struct { - unsigned int TILE_MODE_INDEX : 5; - unsigned int FMASK_TILE_MODE_INDEX : 5; - unsigned int FMASK_BANK_HEIGHT : 2; - unsigned int : 6; - unsigned int DISABLE_FMASK_NOFETCH_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int : 12; - } gfx10Core; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int DISABLE_FMASK_NOALLOC_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int FORCE_LIMIT_COLOR_SECTOR_TO_256B_MAX : 1; - unsigned int : 26; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_ATTRIB2 { - struct { - unsigned int MIP0_HEIGHT : 14; - unsigned int MIP0_WIDTH : 14; - unsigned int MAX_MIP : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_ATTRIB3 { - struct { - unsigned int MIP0_DEPTH : 13; - unsigned int META_LINEAR : 1; - unsigned int COLOR_SW_MODE : 5; - unsigned int : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int : 4; - unsigned int DCC_PIPE_ALIGNED : 1; - unsigned int : 1; - } bits, bitfields; - struct { - unsigned int : 19; - unsigned int FMASK_SW_MODE : 5; - unsigned int : 2; - unsigned int CMASK_PIPE_ALIGNED : 1; - unsigned int : 5; - } gfx10; - struct { - unsigned int : 27; - unsigned int RESOURCE_LEVEL : 3; - unsigned int : 2; - } gfx10Core; - struct { - unsigned int : 31; - unsigned int VRS_RATE_HINT_ENABLE : 1; - } gfx10Vrs; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_CLEAR_WORD0 { - struct { - unsigned int CLEAR_WORD0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_CLEAR_WORD1 { - struct { - unsigned int CLEAR_WORD1 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_CMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_CMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_CMASK_SLICE { - struct { - unsigned int TILE_MAX : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_DCC_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_DCC_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_DCC_CONTROL { - struct { - unsigned int : 2; - unsigned int MAX_UNCOMPRESSED_BLOCK_SIZE : 2; - unsigned int MIN_COMPRESSED_BLOCK_SIZE : 1; - unsigned int MAX_COMPRESSED_BLOCK_SIZE : 2; - unsigned int COLOR_TRANSFORM : 2; - unsigned int INDEPENDENT_64B_BLOCKS : 1; - unsigned int : 22; - } bits, bitfields; - struct { - unsigned int OVERWRITE_COMBINER_DISABLE : 1; - unsigned int KEY_CLEAR_ENABLE : 1; - unsigned int : 8; - unsigned int LOSSY_RGB_PRECISION : 4; - unsigned int LOSSY_ALPHA_PRECISION : 4; - unsigned int : 14; - } gfx09_10; - struct { - unsigned int : 18; - unsigned int DISABLE_CONSTANT_ENCODE_REG : 1; - unsigned int ENABLE_CONSTANT_ENCODE_REG_WRITE : 1; - unsigned int : 12; - } gfx09_1xPlus; - struct { - unsigned int : 20; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - } gfx10; - struct { - unsigned int : 22; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int : 9; - } gfx103Derivative; - struct { - unsigned int : 21; - unsigned int SKIP_LOW_COMP_RATIO : 1; - unsigned int : 10; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SAMPLE_MASK_TRACKER_DISABLE : 1; - unsigned int SAMPLE_MASK_TRACKER_FEA_FORCE : 1; - unsigned int : 8; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - unsigned int FDCC_ENABLE : 1; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int FRAGMENT_COMPRESS_DISABLE : 1; - unsigned int : 7; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_FMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_FMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_FMASK_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_INFO { - struct { - unsigned int : 8; - unsigned int NUMBER_TYPE : 3; - unsigned int COMP_SWAP : 2; - unsigned int : 2; - unsigned int BLEND_CLAMP : 1; - unsigned int BLEND_BYPASS : 1; - unsigned int SIMPLE_FLOAT : 1; - unsigned int ROUND_MODE : 1; - unsigned int : 1; - unsigned int BLEND_OPT_DONT_RD_DST : 3; - unsigned int BLEND_OPT_DISCARD_PIXEL : 3; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 31; - unsigned int ALT_TILE_MODE : 1; - } most; - struct { - unsigned int ENDIAN : 2; - unsigned int FORMAT : 5; - unsigned int : 6; - unsigned int FAST_CLEAR : 1; - unsigned int COMPRESSION : 1; - unsigned int : 11; - unsigned int FMASK_COMPRESSION_DISABLE : 1; - unsigned int FMASK_COMPRESS_1FRAG_ONLY : 1; - unsigned int DCC_ENABLE : 1; - unsigned int CMASK_ADDR_TYPE : 2; - unsigned int : 1; - } gfx09_10; - struct { - unsigned int : 31; - unsigned int NBC_TILING : 1; - } gfx103; - struct { - unsigned int : 19; - unsigned int CMASK_IS_LINEAR : 1; - unsigned int : 12; - } gfx10Core; - struct { - unsigned int : 7; - unsigned int LINEAR_GENERAL : 1; - unsigned int : 24; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int FORMAT : 5; - unsigned int : 27; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_PITCH { - struct { - unsigned int TILE_MAX : 11; - unsigned int : 9; - unsigned int FMASK_TILE_MAX : 11; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR4_VIEW { - struct { - unsigned int SLICE_START : 11; - unsigned int : 2; - unsigned int SLICE_MAX : 11; - unsigned int MIP_LEVEL : 4; - unsigned int : 4; - } gfx09; - struct { - unsigned int SLICE_START : 13; - unsigned int SLICE_MAX : 13; - unsigned int MIP_LEVEL : 4; - unsigned int : 2; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_ATTRIB { - struct { - unsigned int : 12; - unsigned int NUM_SAMPLES : 3; - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int : 14; - } most; - struct { - unsigned int MIP0_DEPTH : 11; - unsigned int META_LINEAR : 1; - unsigned int : 6; - unsigned int COLOR_SW_MODE : 5; - unsigned int FMASK_SW_MODE : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int RB_ALIGNED : 1; - unsigned int PIPE_ALIGNED : 1; - } gfx09; - struct { - unsigned int TILE_MODE_INDEX : 5; - unsigned int FMASK_TILE_MODE_INDEX : 5; - unsigned int FMASK_BANK_HEIGHT : 2; - unsigned int : 6; - unsigned int DISABLE_FMASK_NOFETCH_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int : 12; - } gfx10Core; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int DISABLE_FMASK_NOALLOC_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int FORCE_LIMIT_COLOR_SECTOR_TO_256B_MAX : 1; - unsigned int : 26; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_ATTRIB2 { - struct { - unsigned int MIP0_HEIGHT : 14; - unsigned int MIP0_WIDTH : 14; - unsigned int MAX_MIP : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_ATTRIB3 { - struct { - unsigned int MIP0_DEPTH : 13; - unsigned int META_LINEAR : 1; - unsigned int COLOR_SW_MODE : 5; - unsigned int : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int : 4; - unsigned int DCC_PIPE_ALIGNED : 1; - unsigned int : 1; - } bits, bitfields; - struct { - unsigned int : 19; - unsigned int FMASK_SW_MODE : 5; - unsigned int : 2; - unsigned int CMASK_PIPE_ALIGNED : 1; - unsigned int : 5; - } gfx10; - struct { - unsigned int : 27; - unsigned int RESOURCE_LEVEL : 3; - unsigned int : 2; - } gfx10Core; - struct { - unsigned int : 31; - unsigned int VRS_RATE_HINT_ENABLE : 1; - } gfx10Vrs; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_CLEAR_WORD0 { - struct { - unsigned int CLEAR_WORD0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_CLEAR_WORD1 { - struct { - unsigned int CLEAR_WORD1 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_CMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_CMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_CMASK_SLICE { - struct { - unsigned int TILE_MAX : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_DCC_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_DCC_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_DCC_CONTROL { - struct { - unsigned int : 2; - unsigned int MAX_UNCOMPRESSED_BLOCK_SIZE : 2; - unsigned int MIN_COMPRESSED_BLOCK_SIZE : 1; - unsigned int MAX_COMPRESSED_BLOCK_SIZE : 2; - unsigned int COLOR_TRANSFORM : 2; - unsigned int INDEPENDENT_64B_BLOCKS : 1; - unsigned int : 22; - } bits, bitfields; - struct { - unsigned int OVERWRITE_COMBINER_DISABLE : 1; - unsigned int KEY_CLEAR_ENABLE : 1; - unsigned int : 8; - unsigned int LOSSY_RGB_PRECISION : 4; - unsigned int LOSSY_ALPHA_PRECISION : 4; - unsigned int : 14; - } gfx09_10; - struct { - unsigned int : 18; - unsigned int DISABLE_CONSTANT_ENCODE_REG : 1; - unsigned int ENABLE_CONSTANT_ENCODE_REG_WRITE : 1; - unsigned int : 12; - } gfx09_1xPlus; - struct { - unsigned int : 20; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - } gfx10; - struct { - unsigned int : 22; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int : 9; - } gfx103Derivative; - struct { - unsigned int : 21; - unsigned int SKIP_LOW_COMP_RATIO : 1; - unsigned int : 10; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SAMPLE_MASK_TRACKER_DISABLE : 1; - unsigned int SAMPLE_MASK_TRACKER_FEA_FORCE : 1; - unsigned int : 8; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - unsigned int FDCC_ENABLE : 1; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int FRAGMENT_COMPRESS_DISABLE : 1; - unsigned int : 7; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_FMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_FMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_FMASK_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_INFO { - struct { - unsigned int : 8; - unsigned int NUMBER_TYPE : 3; - unsigned int COMP_SWAP : 2; - unsigned int : 2; - unsigned int BLEND_CLAMP : 1; - unsigned int BLEND_BYPASS : 1; - unsigned int SIMPLE_FLOAT : 1; - unsigned int ROUND_MODE : 1; - unsigned int : 1; - unsigned int BLEND_OPT_DONT_RD_DST : 3; - unsigned int BLEND_OPT_DISCARD_PIXEL : 3; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 31; - unsigned int ALT_TILE_MODE : 1; - } most; - struct { - unsigned int ENDIAN : 2; - unsigned int FORMAT : 5; - unsigned int : 6; - unsigned int FAST_CLEAR : 1; - unsigned int COMPRESSION : 1; - unsigned int : 11; - unsigned int FMASK_COMPRESSION_DISABLE : 1; - unsigned int FMASK_COMPRESS_1FRAG_ONLY : 1; - unsigned int DCC_ENABLE : 1; - unsigned int CMASK_ADDR_TYPE : 2; - unsigned int : 1; - } gfx09_10; - struct { - unsigned int : 31; - unsigned int NBC_TILING : 1; - } gfx103; - struct { - unsigned int : 19; - unsigned int CMASK_IS_LINEAR : 1; - unsigned int : 12; - } gfx10Core; - struct { - unsigned int : 7; - unsigned int LINEAR_GENERAL : 1; - unsigned int : 24; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int FORMAT : 5; - unsigned int : 27; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_PITCH { - struct { - unsigned int TILE_MAX : 11; - unsigned int : 9; - unsigned int FMASK_TILE_MAX : 11; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR5_VIEW { - struct { - unsigned int SLICE_START : 11; - unsigned int : 2; - unsigned int SLICE_MAX : 11; - unsigned int MIP_LEVEL : 4; - unsigned int : 4; - } gfx09; - struct { - unsigned int SLICE_START : 13; - unsigned int SLICE_MAX : 13; - unsigned int MIP_LEVEL : 4; - unsigned int : 2; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_ATTRIB { - struct { - unsigned int : 12; - unsigned int NUM_SAMPLES : 3; - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int : 14; - } most; - struct { - unsigned int MIP0_DEPTH : 11; - unsigned int META_LINEAR : 1; - unsigned int : 6; - unsigned int COLOR_SW_MODE : 5; - unsigned int FMASK_SW_MODE : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int RB_ALIGNED : 1; - unsigned int PIPE_ALIGNED : 1; - } gfx09; - struct { - unsigned int TILE_MODE_INDEX : 5; - unsigned int FMASK_TILE_MODE_INDEX : 5; - unsigned int FMASK_BANK_HEIGHT : 2; - unsigned int : 6; - unsigned int DISABLE_FMASK_NOFETCH_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int : 12; - } gfx10Core; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int DISABLE_FMASK_NOALLOC_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int FORCE_LIMIT_COLOR_SECTOR_TO_256B_MAX : 1; - unsigned int : 26; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_ATTRIB2 { - struct { - unsigned int MIP0_HEIGHT : 14; - unsigned int MIP0_WIDTH : 14; - unsigned int MAX_MIP : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_ATTRIB3 { - struct { - unsigned int MIP0_DEPTH : 13; - unsigned int META_LINEAR : 1; - unsigned int COLOR_SW_MODE : 5; - unsigned int : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int : 4; - unsigned int DCC_PIPE_ALIGNED : 1; - unsigned int : 1; - } bits, bitfields; - struct { - unsigned int : 19; - unsigned int FMASK_SW_MODE : 5; - unsigned int : 2; - unsigned int CMASK_PIPE_ALIGNED : 1; - unsigned int : 5; - } gfx10; - struct { - unsigned int : 27; - unsigned int RESOURCE_LEVEL : 3; - unsigned int : 2; - } gfx10Core; - struct { - unsigned int : 31; - unsigned int VRS_RATE_HINT_ENABLE : 1; - } gfx10Vrs; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_CLEAR_WORD0 { - struct { - unsigned int CLEAR_WORD0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_CLEAR_WORD1 { - struct { - unsigned int CLEAR_WORD1 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_CMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_CMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_CMASK_SLICE { - struct { - unsigned int TILE_MAX : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_DCC_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_DCC_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_DCC_CONTROL { - struct { - unsigned int : 2; - unsigned int MAX_UNCOMPRESSED_BLOCK_SIZE : 2; - unsigned int MIN_COMPRESSED_BLOCK_SIZE : 1; - unsigned int MAX_COMPRESSED_BLOCK_SIZE : 2; - unsigned int COLOR_TRANSFORM : 2; - unsigned int INDEPENDENT_64B_BLOCKS : 1; - unsigned int : 22; - } bits, bitfields; - struct { - unsigned int OVERWRITE_COMBINER_DISABLE : 1; - unsigned int KEY_CLEAR_ENABLE : 1; - unsigned int : 8; - unsigned int LOSSY_RGB_PRECISION : 4; - unsigned int LOSSY_ALPHA_PRECISION : 4; - unsigned int : 14; - } gfx09_10; - struct { - unsigned int : 18; - unsigned int DISABLE_CONSTANT_ENCODE_REG : 1; - unsigned int ENABLE_CONSTANT_ENCODE_REG_WRITE : 1; - unsigned int : 12; - } gfx09_1xPlus; - struct { - unsigned int : 20; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - } gfx10; - struct { - unsigned int : 22; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int : 9; - } gfx103Derivative; - struct { - unsigned int : 21; - unsigned int SKIP_LOW_COMP_RATIO : 1; - unsigned int : 10; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SAMPLE_MASK_TRACKER_DISABLE : 1; - unsigned int SAMPLE_MASK_TRACKER_FEA_FORCE : 1; - unsigned int : 8; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - unsigned int FDCC_ENABLE : 1; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int FRAGMENT_COMPRESS_DISABLE : 1; - unsigned int : 7; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_FMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_FMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_FMASK_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_INFO { - struct { - unsigned int : 8; - unsigned int NUMBER_TYPE : 3; - unsigned int COMP_SWAP : 2; - unsigned int : 2; - unsigned int BLEND_CLAMP : 1; - unsigned int BLEND_BYPASS : 1; - unsigned int SIMPLE_FLOAT : 1; - unsigned int ROUND_MODE : 1; - unsigned int : 1; - unsigned int BLEND_OPT_DONT_RD_DST : 3; - unsigned int BLEND_OPT_DISCARD_PIXEL : 3; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 31; - unsigned int ALT_TILE_MODE : 1; - } most; - struct { - unsigned int ENDIAN : 2; - unsigned int FORMAT : 5; - unsigned int : 6; - unsigned int FAST_CLEAR : 1; - unsigned int COMPRESSION : 1; - unsigned int : 11; - unsigned int FMASK_COMPRESSION_DISABLE : 1; - unsigned int FMASK_COMPRESS_1FRAG_ONLY : 1; - unsigned int DCC_ENABLE : 1; - unsigned int CMASK_ADDR_TYPE : 2; - unsigned int : 1; - } gfx09_10; - struct { - unsigned int : 31; - unsigned int NBC_TILING : 1; - } gfx103; - struct { - unsigned int : 19; - unsigned int CMASK_IS_LINEAR : 1; - unsigned int : 12; - } gfx10Core; - struct { - unsigned int : 7; - unsigned int LINEAR_GENERAL : 1; - unsigned int : 24; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int FORMAT : 5; - unsigned int : 27; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_PITCH { - struct { - unsigned int TILE_MAX : 11; - unsigned int : 9; - unsigned int FMASK_TILE_MAX : 11; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR6_VIEW { - struct { - unsigned int SLICE_START : 11; - unsigned int : 2; - unsigned int SLICE_MAX : 11; - unsigned int MIP_LEVEL : 4; - unsigned int : 4; - } gfx09; - struct { - unsigned int SLICE_START : 13; - unsigned int SLICE_MAX : 13; - unsigned int MIP_LEVEL : 4; - unsigned int : 2; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_ATTRIB { - struct { - unsigned int : 12; - unsigned int NUM_SAMPLES : 3; - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int : 14; - } most; - struct { - unsigned int MIP0_DEPTH : 11; - unsigned int META_LINEAR : 1; - unsigned int : 6; - unsigned int COLOR_SW_MODE : 5; - unsigned int FMASK_SW_MODE : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int RB_ALIGNED : 1; - unsigned int PIPE_ALIGNED : 1; - } gfx09; - struct { - unsigned int TILE_MODE_INDEX : 5; - unsigned int FMASK_TILE_MODE_INDEX : 5; - unsigned int FMASK_BANK_HEIGHT : 2; - unsigned int : 6; - unsigned int DISABLE_FMASK_NOFETCH_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int : 12; - } gfx10Core; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int NUM_FRAGMENTS : 2; - unsigned int FORCE_DST_ALPHA_1 : 1; - unsigned int DISABLE_FMASK_NOALLOC_OPT : 1; - unsigned int LIMIT_COLOR_FETCH_TO_256B_MAX : 1; - unsigned int FORCE_LIMIT_COLOR_SECTOR_TO_256B_MAX : 1; - unsigned int : 26; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_ATTRIB2 { - struct { - unsigned int MIP0_HEIGHT : 14; - unsigned int MIP0_WIDTH : 14; - unsigned int MAX_MIP : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_ATTRIB3 { - struct { - unsigned int MIP0_DEPTH : 13; - unsigned int META_LINEAR : 1; - unsigned int COLOR_SW_MODE : 5; - unsigned int : 5; - unsigned int RESOURCE_TYPE : 2; - unsigned int : 4; - unsigned int DCC_PIPE_ALIGNED : 1; - unsigned int : 1; - } bits, bitfields; - struct { - unsigned int : 19; - unsigned int FMASK_SW_MODE : 5; - unsigned int : 2; - unsigned int CMASK_PIPE_ALIGNED : 1; - unsigned int : 5; - } gfx10; - struct { - unsigned int : 27; - unsigned int RESOURCE_LEVEL : 3; - unsigned int : 2; - } gfx10Core; - struct { - unsigned int : 31; - unsigned int VRS_RATE_HINT_ENABLE : 1; - } gfx10Vrs; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_CLEAR_WORD0 { - struct { - unsigned int CLEAR_WORD0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_CLEAR_WORD1 { - struct { - unsigned int CLEAR_WORD1 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_CMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_CMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_CMASK_SLICE { - struct { - unsigned int TILE_MAX : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_DCC_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_DCC_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_DCC_CONTROL { - struct { - unsigned int : 2; - unsigned int MAX_UNCOMPRESSED_BLOCK_SIZE : 2; - unsigned int MIN_COMPRESSED_BLOCK_SIZE : 1; - unsigned int MAX_COMPRESSED_BLOCK_SIZE : 2; - unsigned int COLOR_TRANSFORM : 2; - unsigned int INDEPENDENT_64B_BLOCKS : 1; - unsigned int : 22; - } bits, bitfields; - struct { - unsigned int OVERWRITE_COMBINER_DISABLE : 1; - unsigned int KEY_CLEAR_ENABLE : 1; - unsigned int : 8; - unsigned int LOSSY_RGB_PRECISION : 4; - unsigned int LOSSY_ALPHA_PRECISION : 4; - unsigned int : 14; - } gfx09_10; - struct { - unsigned int : 18; - unsigned int DISABLE_CONSTANT_ENCODE_REG : 1; - unsigned int ENABLE_CONSTANT_ENCODE_REG_WRITE : 1; - unsigned int : 12; - } gfx09_1xPlus; - struct { - unsigned int : 20; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - } gfx10; - struct { - unsigned int : 22; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int : 9; - } gfx103Derivative; - struct { - unsigned int : 21; - unsigned int SKIP_LOW_COMP_RATIO : 1; - unsigned int : 10; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SAMPLE_MASK_TRACKER_DISABLE : 1; - unsigned int SAMPLE_MASK_TRACKER_FEA_FORCE : 1; - unsigned int : 8; - unsigned int INDEPENDENT_128B_BLOCKS : 1; - unsigned int : 11; - unsigned int FDCC_ENABLE : 1; - unsigned int DCC_COMPRESS_DISABLE : 1; - unsigned int FRAGMENT_COMPRESS_DISABLE : 1; - unsigned int : 7; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_FMASK { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_FMASK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_FMASK_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_INFO { - struct { - unsigned int : 8; - unsigned int NUMBER_TYPE : 3; - unsigned int COMP_SWAP : 2; - unsigned int : 2; - unsigned int BLEND_CLAMP : 1; - unsigned int BLEND_BYPASS : 1; - unsigned int SIMPLE_FLOAT : 1; - unsigned int ROUND_MODE : 1; - unsigned int : 1; - unsigned int BLEND_OPT_DONT_RD_DST : 3; - unsigned int BLEND_OPT_DISCARD_PIXEL : 3; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 31; - unsigned int ALT_TILE_MODE : 1; - } most; - struct { - unsigned int ENDIAN : 2; - unsigned int FORMAT : 5; - unsigned int : 6; - unsigned int FAST_CLEAR : 1; - unsigned int COMPRESSION : 1; - unsigned int : 11; - unsigned int FMASK_COMPRESSION_DISABLE : 1; - unsigned int FMASK_COMPRESS_1FRAG_ONLY : 1; - unsigned int DCC_ENABLE : 1; - unsigned int CMASK_ADDR_TYPE : 2; - unsigned int : 1; - } gfx09_10; - struct { - unsigned int : 31; - unsigned int NBC_TILING : 1; - } gfx103; - struct { - unsigned int : 19; - unsigned int CMASK_IS_LINEAR : 1; - unsigned int : 12; - } gfx10Core; - struct { - unsigned int : 7; - unsigned int LINEAR_GENERAL : 1; - unsigned int : 24; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int FORMAT : 5; - unsigned int : 27; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_PITCH { - struct { - unsigned int TILE_MAX : 11; - unsigned int : 9; - unsigned int FMASK_TILE_MAX : 11; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_SLICE { - struct { - unsigned int TILE_MAX : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR7_VIEW { - struct { - unsigned int SLICE_START : 11; - unsigned int : 2; - unsigned int SLICE_MAX : 11; - unsigned int MIP_LEVEL : 4; - unsigned int : 4; - } gfx09; - struct { - unsigned int SLICE_START : 13; - unsigned int SLICE_MAX : 13; - unsigned int MIP_LEVEL : 4; - unsigned int : 2; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COLOR_CONTROL { - struct { - unsigned int DISABLE_DUAL_QUAD : 1; - unsigned int : 2; - unsigned int DEGAMMA_ENABLE : 1; - unsigned int MODE : 3; - unsigned int : 9; - unsigned int ROP3 : 8; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 1; - unsigned int : 1; - unsigned int : 30; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_COVERAGE_OUT_CONTROL { - struct { - unsigned int COVERAGE_OUT_ENABLE : 1; - unsigned int COVERAGE_OUT_MRT : 3; - unsigned int COVERAGE_OUT_CHANNEL : 2; - unsigned int : 2; - unsigned int COVERAGE_OUT_SAMPLES : 4; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_DCC_CONFIG { - struct { - unsigned int : 26; - unsigned int DCC_CACHE_NUM_TAGS : 6; - } most; - struct { - unsigned int : 16; - unsigned int READ_RETURN_SKID_FIFO_DEPTH : 7; - unsigned int : 1; - unsigned int DCC_CACHE_EVICT_POINT : 4; - unsigned int DCC_CACHE_NUM_TAGS : 4; - } gfx09; - struct { - unsigned int OVERWRITE_COMBINER_DEPTH : 5; - unsigned int OVERWRITE_COMBINER_DISABLE : 1; - unsigned int OVERWRITE_COMBINER_CC_POP_DISABLE : 1; - unsigned int : 1; - unsigned int FC_RDLAT_KEYID_FIFO_DEPTH : 8; - unsigned int : 16; - } gfx09_10; - struct { - unsigned int : 7; - unsigned int DISABLE_CONSTANT_ENCODE : 1; - unsigned int : 24; - } gfx09_1xPlus; - struct { - unsigned int : 25; - unsigned int DCC_CACHE_NUM_TAGS : 7; - } gfx103PlusExclusive; - struct { - unsigned int : 16; - unsigned int READ_RETURN_SKID_FIFO_DEPTH : 9; - unsigned int : 7; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SAMPLE_MASK_TRACKER_DEPTH : 5; - unsigned int SAMPLE_MASK_TRACKER_DISABLE : 1; - unsigned int SPARE_13 : 1; - unsigned int : 1; - unsigned int SPARE_14 : 8; - unsigned int : 16; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union CB_DCC_CONFIG2 { - struct { - unsigned int INVALID_KEY_ERROR_CODE : 8; - unsigned int CLEAR_FRAG2DCC_KEY_ERROR_CODE : 1; - unsigned int ENABLE_COMP_KEY_ERROR_DETECTION : 1; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union CB_DCC_CONTROL { - struct { - unsigned int OVERWRITE_COMBINER_DISABLE : 1; - unsigned int : 1; - unsigned int OVERWRITE_COMBINER_WATERMARK : 5; - unsigned int : 25; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int DISABLE_CONSTANT_ENCODE_AC01 : 1; - unsigned int DISABLE_CONSTANT_ENCODE_SINGLE : 1; - unsigned int DISABLE_CONSTANT_ENCODE_REG : 1; - unsigned int : 1; - unsigned int DISABLE_ELIMFC_SKIP_OF_AC01 : 1; - unsigned int DISABLE_ELIMFC_SKIP_OF_SINGLE : 1; - unsigned int ENABLE_ELIMFC_SKIP_OF_REG : 1; - unsigned int : 17; - } most; - struct { - unsigned int : 1; - unsigned int OVERWRITE_COMBINER_MRT_SHARING_DISABLE : 1; - unsigned int : 30; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union CB_FDCC_CONTROL { - struct { - unsigned int SAMPLE_MASK_TRACKER_DISABLE : 1; - unsigned int : 1; - unsigned int SAMPLE_MASK_TRACKER_WATERMARK : 5; - unsigned int : 1; - unsigned int DISABLE_CONSTANT_ENCODE_AC01 : 1; - unsigned int DISABLE_CONSTANT_ENCODE_SINGLE : 1; - unsigned int DISABLE_CONSTANT_ENCODE_REG : 1; - unsigned int : 1; - unsigned int DISABLE_ELIMFC_SKIP_OF_AC01 : 1; - unsigned int DISABLE_ELIMFC_SKIP_OF_SINGLE : 1; - unsigned int ENABLE_ELIMFC_SKIP_OF_REG : 1; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union CB_FGCG_SRAM_OVERRIDE { - struct { - unsigned int DISABLE_FGCG : 20; - unsigned int : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union CB_HW_CONTROL { - struct { - unsigned int : 19; - unsigned int FORCE_NEEDS_DST : 1; - unsigned int : 1; - unsigned int DISABLE_BLEND_OPT_RESULT_EQ_DEST : 1; - unsigned int : 2; - unsigned int DISABLE_BLEND_OPT_DONT_RD_DST : 1; - unsigned int DISABLE_BLEND_OPT_BYPASS : 1; - unsigned int DISABLE_BLEND_OPT_DISCARD_PIXEL : 1; - unsigned int DISABLE_BLEND_OPT_WHEN_DISABLED_SRCALPHA_IS_USED : 1; - unsigned int : 2; - unsigned int DISABLE_CC_IB_SERIALIZER_STATE_OPT : 1; - unsigned int DISABLE_PIXEL_IN_QUAD_FIX_FOR_LINEAR_SURFACE : 1; - } bits, bitfields; - struct { - unsigned int : 18; - unsigned int DISABLE_INTNORM_LE11BPC_CLAMPING : 1; - unsigned int : 13; - } most; - struct { - unsigned int CM_CACHE_EVICT_POINT : 4; - unsigned int : 2; - unsigned int FC_CACHE_EVICT_POINT : 4; - unsigned int : 2; - unsigned int CC_CACHE_EVICT_POINT : 4; - unsigned int ALLOW_MRT_WITH_DUAL_SOURCE : 1; - unsigned int : 15; - } gfx09; - struct { - unsigned int : 20; - unsigned int FORCE_ALWAYS_TOGGLE : 1; - unsigned int : 1; - unsigned int DISABLE_FULL_WRITE_MASK : 1; - unsigned int DISABLE_RESOLVE_OPT_FOR_SINGLE_FRAG : 1; - unsigned int : 4; - unsigned int PRIORITIZE_FC_WR_OVER_FC_RD_ON_CMASK_CONFLICT : 1; - unsigned int PRIORITIZE_FC_EVICT_OVER_FOP_RD_ON_BANK_CONFLICT : 1; - unsigned int : 2; - } gfx09_10; - struct { - unsigned int : 15; - unsigned int DISABLE_FMASK_MULTI_MGCG_DOMAINS : 1; - unsigned int : 16; - } gfx103; - struct { - unsigned int : 3; - unsigned int DISABLE_FILLRATE_OPT_FIX_WITH_CFC : 1; - unsigned int DISABLE_POST_DCC_WITH_CFC_FIX : 1; - unsigned int DISABLE_COMPRESS_1FRAG_WHEN_VRS_RATE_HINT_EN : 1; - unsigned int : 6; - unsigned int CHICKEN_BITS : 3; - unsigned int : 1; - unsigned int DISABLE_CMASK_CACHE_BYTEMASKING : 1; - unsigned int : 15; - } gfx103Derivative; - struct { - unsigned int : 1; - unsigned int DISABLE_VRS_FILLRATE_OPTIMIZATION : 1; - unsigned int : 4; - unsigned int RMI_CREDITS : 6; - unsigned int : 5; - unsigned int DISABLE_DCC_CACHE_BYTEMASKING : 1; - unsigned int : 14; - } gfx103PlusExclusive; - struct { - unsigned int ALLOW_MRT_WITH_DUAL_SOURCE : 1; - unsigned int : 31; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 2; - unsigned int DISABLE_SMT_WHEN_NO_FDCC_FIX : 1; - unsigned int : 9; - unsigned int NUM_CCC_SKID_FIFO_ENTRIES : 3; - unsigned int FORCE_FEA_HIGH : 1; - unsigned int FORCE_EVICT_ALL_VALID : 1; - unsigned int : 3; - unsigned int DISABLE_USE_OF_SET_HASH : 1; - unsigned int : 1; - unsigned int SPARE_2 : 1; - unsigned int : 6; - unsigned int SPARE_3 : 1; - unsigned int : 2; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_HW_CONTROL_1 { - struct { - unsigned int : 5; - unsigned int FC_CACHE_NUM_TAGS : 6; - unsigned int : 21; - } most; - struct { - unsigned int : 11; - unsigned int CC_CACHE_NUM_TAGS : 6; - unsigned int CM_TILE_FIFO_DEPTH : 9; - unsigned int RMI_CREDITS : 6; - } gfx09; - struct { - unsigned int CM_CACHE_NUM_TAGS : 5; - unsigned int : 27; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int CC_CACHE_NUM_TAGS : 6; - unsigned int CM_TILE_FIFO_DEPTH : 9; - unsigned int RMI_CREDITS : 6; - } gfx101; - struct { - unsigned int : 12; - unsigned int CC_CACHE_NUM_TAGS : 6; - unsigned int CM_TILE_FIFO_DEPTH : 9; - unsigned int : 5; - } gfx103Derivative; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int CC_CACHE_NUM_TAGS : 6; - unsigned int : 26; - } gfx11; -#endif -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 5; - unsigned int FC_CACHE_NUM_TAGS : 7; - unsigned int : 20; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 5; - unsigned int FC_CACHE_NUM_TAGS : 7; - unsigned int : 20; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 5; - unsigned int FC_CACHE_NUM_TAGS : 7; - unsigned int : 20; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 5; - unsigned int FC_CACHE_NUM_TAGS : 7; - unsigned int : 20; - } nv24; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_HW_CONTROL_2 { - struct { - unsigned int CC_EVEN_ODD_FIFO_DEPTH : 8; - unsigned int : 24; - } most; - struct { - unsigned int : 24; - unsigned int DRR_ASSUMED_FIFO_DEPTH_DIV8 : 4; - unsigned int CHICKEN_BITS : 4; - } gfx09; - struct { - unsigned int : 8; - unsigned int FC_RDLAT_TILE_FIFO_DEPTH : 7; - unsigned int FC_RDLAT_QUAD_FIFO_DEPTH : 8; - unsigned int : 9; - } gfx09_10; - struct { - unsigned int : 24; - unsigned int DRR_ASSUMED_FIFO_DEPTH_DIV8 : 6; - unsigned int CHICKEN_BITS : 2; - } gfx10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SPARE_4 : 8; - unsigned int DRR_ASSUMED_FIFO_DEPTH_DIV8 : 6; - unsigned int SPARE : 18; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_HW_CONTROL_3 { - struct { - unsigned int : 1; - unsigned int RAM_ADDRESS_CONFLICTS_DISALLOWED : 1; - unsigned int : 30; - } bits, bitfields; - struct { - unsigned int : 26; - unsigned int DISABLE_BLENDER_CLOCK_GATING : 1; - unsigned int : 5; - } most; - struct { - unsigned int : 6; - unsigned int DISABLE_CC_CACHE_OVWR_KEY_MOD : 1; - unsigned int : 1; - unsigned int DISABLE_OVERWRITE_COMBINER_TARGET_MASK_VALIDATION : 1; - unsigned int : 18; - unsigned int : 1; - unsigned int COLOR_CACHE_PREFETCH_NUM_CLS : 2; - unsigned int : 2; - } gfx09; - struct { - unsigned int DISABLE_SLOW_MODE_EMPTY_HALF_QUAD_KILL : 1; - unsigned int : 1; - unsigned int DISABLE_FAST_CLEAR_FETCH_OPT : 1; - unsigned int DISABLE_QUAD_MARKER_DROP_STOP : 1; - unsigned int DISABLE_OVERWRITE_COMBINER_CAM_CLR : 1; - unsigned int DISABLE_CC_CACHE_OVWR_STATUS_ACCUM : 1; - unsigned int : 1; - unsigned int DISABLE_CC_CACHE_PANIC_GATING : 1; - unsigned int : 1; - unsigned int SPLIT_ALL_FAST_MODE_TRANSFERS : 1; - unsigned int DISABLE_SHADER_BLEND_OPTS : 1; - unsigned int DISABLE_CMASK_LAST_QUAD_INSERTION : 1; - unsigned int DISABLE_ROP3_FIXES_OF_BUG_511967 : 1; - unsigned int DISABLE_ROP3_FIXES_OF_BUG_520657 : 1; - unsigned int DISABLE_OC_FIXES_OF_BUG_522542 : 1; - unsigned int FORCE_RMI_LAST_HIGH : 1; - unsigned int FORCE_RMI_CLKEN_HIGH : 1; - unsigned int DISABLE_EARLY_WRACKS_CC : 1; - unsigned int DISABLE_EARLY_WRACKS_FC : 1; - unsigned int DISABLE_EARLY_WRACKS_DC : 1; - unsigned int DISABLE_EARLY_WRACKS_CM : 1; - unsigned int DISABLE_NACK_PROCESSING_CC : 1; - unsigned int DISABLE_NACK_PROCESSING_FC : 1; - unsigned int DISABLE_NACK_PROCESSING_DC : 1; - unsigned int DISABLE_NACK_PROCESSING_CM : 1; - unsigned int DISABLE_NACK_COLOR_RD_WR_OPT : 1; - unsigned int : 6; - } gfx09_10; - struct { - unsigned int : 30; - unsigned int DISABLE_FMASK_NOFETCH_OPT : 1; - unsigned int : 1; - } gfx10; - struct { - unsigned int : 8; - unsigned int DISABLE_OVERWRITE_COMBINER_TARGET_MASK_VALIDATION : 1; - unsigned int : 18; - unsigned int : 1; - unsigned int : 4; - } gfx101; - struct { - unsigned int : 31; - unsigned int DISABLE_FMASK_NOFETCH_OPT_BC : 1; - } gfx10Core; - struct { - unsigned int : 28; - unsigned int DISABLE_DCC_VRS_OPT : 1; - unsigned int : 3; - } gfx10Vrs; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SPARE_5 : 1; - unsigned int : 1; - unsigned int SPARE_6 : 1; - unsigned int SPARE_7 : 1; - unsigned int DISABLE_CC_CACHE_OVWR_STATUS_ACCUM : 1; - unsigned int DISABLE_CC_CACHE_PANIC_GATING : 1; - unsigned int SPLIT_ALL_FAST_MODE_TRANSFERS : 1; - unsigned int DISABLE_SHADER_BLEND_OPTS : 1; - unsigned int : 3; - unsigned int FORCE_RMI_LAST_HIGH : 1; - unsigned int FORCE_RMI_CLKEN_HIGH : 1; - unsigned int DISABLE_EARLY_WRACKS_CC : 1; - unsigned int DISABLE_EARLY_WRACKS_DC : 1; - unsigned int DISABLE_NACK_PROCESSING_CC : 1; - unsigned int DISABLE_NACK_PROCESSING_DC : 1; - unsigned int SPARE_8 : 1; - unsigned int SPARE_9 : 1; - unsigned int : 1; - unsigned int DISABLE_DCC_VRS_OPT : 1; - unsigned int DISABLE_FMASK_NOALLOC_OPT : 1; - unsigned int : 10; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_HW_CONTROL_4 { - struct { - unsigned int COLOR_CACHE_FETCH_NUM_CLS_LOG2 : 3; - unsigned int FMASK_CACHE_FETCH_NUM_CLS_LOG2 : 2; - unsigned int DISABLE_USE_OF_QUAD_SCOREBOARD : 1; - unsigned int : 3; - unsigned int DISABLE_QSB_AA_MODE : 1; - unsigned int DISABLE_QSB_WAIT_FOR_SCORE : 1; - unsigned int DISABLE_QSB_FRAG_GT0 : 1; - unsigned int REVERSE_KEYXFR_RD_PRIORITY : 1; - unsigned int DISABLE_KEYXFR_HIT_RETURNS : 1; - unsigned int : 1; - unsigned int DISABLE_MA_WAIT_FOR_LAST : 1; - unsigned int DISABLE_QSB_SPECULATIVE : 1; - unsigned int QSB_WAIT_FOR_SCORE : 5; - unsigned int DISABLE_TILE_FGCG : 1; - unsigned int DISABLE_LQUAD_FGCG : 1; - unsigned int FC_QSB_FIFO_DEPTH : 8; - } gfx10; - struct { - unsigned int : 6; - unsigned int DISABLE_CMASK_CLOCK_GATING : 1; - unsigned int DISABLE_FMASK_CLOCK_GATING : 1; - unsigned int DISABLE_COLOR_CLOCK_GATING : 1; - unsigned int : 5; - unsigned int DISABLE_BC_COLOR_CACHE_PREFETCH : 1; - unsigned int : 17; - } gfx10Core; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int COLOR_CACHE_FETCH_NUM_QB_LOG2 : 3; - unsigned int COLOR_CACHE_FETCH_ALGORITHM : 2; - unsigned int DISABLE_USE_OF_SMT_SCORE : 1; - unsigned int SPARE_10 : 1; - unsigned int SPARE_11 : 1; - unsigned int SPARE_12 : 1; - unsigned int DISABLE_MA_WAIT_FOR_LAST : 1; - unsigned int SMT_TIMEOUT_THRESHOLD : 3; - unsigned int SMT_QPFIFO_THRESHOLD : 3; - unsigned int ENABLE_FRAGOP_STALLING_ON_RAW_HAZARD : 1; - unsigned int ENABLE_FRAGOP_STALLING_ON_COARSE_RAW_HAZARD : 1; - unsigned int ENABLE_FRAGOP_STALLING_ON_DS_RAW_HAZARD : 1; - unsigned int : 13; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_HW_MEM_ARBITER_RD { - struct { - unsigned int MODE : 2; - unsigned int IGNORE_URGENT_AGE : 4; - unsigned int BREAK_GROUP_AGE : 4; - unsigned int WEIGHT_CC : 2; - unsigned int : 20; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int WEIGHT_FC : 2; - unsigned int WEIGHT_CM : 2; - unsigned int WEIGHT_DC : 2; - unsigned int WEIGHT_DECAY_REQS : 2; - unsigned int WEIGHT_DECAY_NOREQS : 2; - unsigned int WEIGHT_IGNORE_NUM_TIDS : 1; - unsigned int SCALE_AGE : 3; - unsigned int SCALE_WEIGHT : 3; - unsigned int SEND_LASTS_WITHIN_GROUPS : 1; - unsigned int : 2; - } gfx09_10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int WEIGHT_DC : 2; - unsigned int WEIGHT_DECAY_REQS : 2; - unsigned int WEIGHT_DECAY_NOREQS : 2; - unsigned int WEIGHT_IGNORE_NUM_TIDS : 1; - unsigned int SCALE_AGE : 3; - unsigned int SCALE_WEIGHT : 3; - unsigned int SEND_LASTS_WITHIN_GROUPS : 1; - unsigned int : 6; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_HW_MEM_ARBITER_WR { - struct { - unsigned int MODE : 2; - unsigned int IGNORE_URGENT_AGE : 4; - unsigned int BREAK_GROUP_AGE : 4; - unsigned int WEIGHT_CC : 2; - unsigned int : 20; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int WEIGHT_FC : 2; - unsigned int WEIGHT_CM : 2; - unsigned int WEIGHT_DC : 2; - unsigned int WEIGHT_DECAY_REQS : 2; - unsigned int WEIGHT_DECAY_NOREQS : 2; - unsigned int WEIGHT_IGNORE_BYTE_MASK : 1; - unsigned int SCALE_AGE : 3; - unsigned int SCALE_WEIGHT : 3; - unsigned int SEND_LASTS_WITHIN_GROUPS : 1; - unsigned int : 2; - } gfx09_10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int WEIGHT_DC : 2; - unsigned int WEIGHT_DECAY_REQS : 2; - unsigned int WEIGHT_DECAY_NOREQS : 2; - unsigned int WEIGHT_IGNORE_BYTE_MASK : 1; - unsigned int SCALE_AGE : 3; - unsigned int SCALE_WEIGHT : 3; - unsigned int SEND_LASTS_WITHIN_GROUPS : 1; - unsigned int : 6; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union CB_KEY_OVERRIDE_0 { - struct { - unsigned int OVERRIDE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union CB_KEY_OVERRIDE_1 { - struct { - unsigned int OVERRIDE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union CB_KEY_OVERRIDE_2 { - struct { - unsigned int OVERRIDE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union CB_KEY_OVERRIDE_3 { - struct { - unsigned int OVERRIDE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union CB_KEY_OVERRIDE_4 { - struct { - unsigned int OVERRIDE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union CB_KEY_OVERRIDE_5 { - struct { - unsigned int OVERRIDE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union CB_KEY_OVERRIDE_6 { - struct { - unsigned int OVERRIDE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union CB_KEY_OVERRIDE_7 { - struct { - unsigned int OVERRIDE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union CB_MRT0_EPITCH { - struct { - unsigned int EPITCH : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_MRT1_EPITCH { - struct { - unsigned int EPITCH : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_MRT2_EPITCH { - struct { - unsigned int EPITCH : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_MRT3_EPITCH { - struct { - unsigned int EPITCH : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_MRT4_EPITCH { - struct { - unsigned int EPITCH : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_MRT5_EPITCH { - struct { - unsigned int EPITCH : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_MRT6_EPITCH { - struct { - unsigned int EPITCH : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_MRT7_EPITCH { - struct { - unsigned int EPITCH : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER0_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 9; - unsigned int : 1; - unsigned int PERF_SEL1 : 9; - unsigned int : 13; - } most; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER0_SELECT1 { - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL2 : 9; - unsigned int : 1; - unsigned int PERF_SEL3 : 9; - unsigned int : 13; - } most; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER1_SELECT { - struct { - unsigned int : 28; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 9; - unsigned int : 23; - } most; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER2_SELECT { - struct { - unsigned int : 28; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 9; - unsigned int : 23; - } most; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER3_SELECT { - struct { - unsigned int : 28; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 9; - unsigned int : 23; - } most; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_PERFCOUNTER_FILTER { - struct { - unsigned int OP_FILTER_ENABLE : 1; - unsigned int OP_FILTER_SEL : 3; - unsigned int FORMAT_FILTER_ENABLE : 1; - unsigned int FORMAT_FILTER_SEL : 5; - unsigned int CLEAR_FILTER_ENABLE : 1; - unsigned int CLEAR_FILTER_SEL : 1; - unsigned int MRT_FILTER_ENABLE : 1; - unsigned int MRT_FILTER_SEL : 3; - unsigned int : 1; - unsigned int NUM_SAMPLES_FILTER_ENABLE : 1; - unsigned int NUM_SAMPLES_FILTER_SEL : 3; - unsigned int NUM_FRAGMENTS_FILTER_ENABLE : 1; - unsigned int NUM_FRAGMENTS_FILTER_SEL : 2; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_RMI_BC_GL2_CACHE_CONTROL { - struct { - unsigned int CMASK_WR_POLICY : 2; - unsigned int FMASK_WR_POLICY : 2; - unsigned int DCC_WR_POLICY : 2; - unsigned int COLOR_WR_POLICY : 2; - unsigned int : 8; - unsigned int CMASK_RD_POLICY : 2; - unsigned int FMASK_RD_POLICY : 2; - unsigned int DCC_RD_POLICY : 2; - unsigned int COLOR_RD_POLICY : 2; - unsigned int : 7; - unsigned int VOLAT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_RMI_GL2_CACHE_CONTROL { - struct { - unsigned int : 20; - unsigned int DCC_RD_POLICY : 2; - unsigned int COLOR_RD_POLICY : 2; - unsigned int : 7; - unsigned int COLOR_BIG_PAGE : 1; - } bits, bitfields; - struct { - unsigned int CMASK_WR_POLICY : 2; - unsigned int FMASK_WR_POLICY : 2; - unsigned int DCC_WR_POLICY : 2; - unsigned int COLOR_WR_POLICY : 2; - unsigned int : 8; - unsigned int CMASK_RD_POLICY : 2; - unsigned int FMASK_RD_POLICY : 2; - unsigned int : 10; - unsigned int FMASK_BIG_PAGE : 1; - unsigned int : 1; - } gfx10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int DCC_WR_POLICY : 2; - unsigned int COLOR_WR_POLICY : 2; - unsigned int : 28; - } gfx11; -#endif -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 24; - unsigned int CMASK_L3_BYPASS : 1; - unsigned int FMASK_L3_BYPASS : 1; - unsigned int : 6; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 24; - unsigned int CMASK_L3_BYPASS : 1; - unsigned int FMASK_L3_BYPASS : 1; - unsigned int : 6; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 24; - unsigned int CMASK_L3_BYPASS : 1; - unsigned int FMASK_L3_BYPASS : 1; - unsigned int : 6; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 24; - unsigned int CMASK_L3_BYPASS : 1; - unsigned int FMASK_L3_BYPASS : 1; - unsigned int : 6; - } nv24; -#endif -#if CHIP_HDR_NAVI21|| CHIP_HDR_NAVI22|| CHIP_HDR_NAVI23|| CHIP_HDR_NAVI24|| CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33 - struct { - unsigned int : 26; - unsigned int DCC_L3_BYPASS : 1; - unsigned int COLOR_L3_BYPASS : 1; - unsigned int : 4; - } mall; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_SHADER_MASK { - struct { - unsigned int OUTPUT0_ENABLE : 4; - unsigned int OUTPUT1_ENABLE : 4; - unsigned int OUTPUT2_ENABLE : 4; - unsigned int OUTPUT3_ENABLE : 4; - unsigned int OUTPUT4_ENABLE : 4; - unsigned int OUTPUT5_ENABLE : 4; - unsigned int OUTPUT6_ENABLE : 4; - unsigned int OUTPUT7_ENABLE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_STUTTER_CONTROL_CMASK_RDLAT { - struct { - unsigned int THRESHOLD : 8; - unsigned int TIMEOUT : 8; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_STUTTER_CONTROL_COLOR_RDLAT { - struct { - unsigned int THRESHOLD : 8; - unsigned int TIMEOUT : 8; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_STUTTER_CONTROL_FMASK_RDLAT { - struct { - unsigned int THRESHOLD : 8; - unsigned int TIMEOUT : 8; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CB_TARGET_MASK { - struct { - unsigned int TARGET0_ENABLE : 4; - unsigned int TARGET1_ENABLE : 4; - unsigned int TARGET2_ENABLE : 4; - unsigned int TARGET3_ENABLE : 4; - unsigned int TARGET4_ENABLE : 4; - unsigned int TARGET5_ENABLE : 4; - unsigned int TARGET6_ENABLE : 4; - unsigned int TARGET7_ENABLE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHA_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHA_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHA_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHA_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHA_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHA_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHA_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHA_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHA_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHA_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHA_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHA_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHA_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHCG_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHCG_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHCG_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHCG_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHCG_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHCG_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHCG_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHCG_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHCG_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHCG_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHCG_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHCG_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHCG_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHC_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHC_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHC_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHC_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHC_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHC_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHC_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHC_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHC_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHC_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHC_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHC_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CHC_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COHER_DEST_BASE_0 { - struct { - unsigned int DEST_BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COHER_DEST_BASE_1 { - struct { - unsigned int DEST_BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COHER_DEST_BASE_2 { - struct { - unsigned int DEST_BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COHER_DEST_BASE_3 { - struct { - unsigned int DEST_BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COHER_DEST_BASE_HI_0 { - struct { - unsigned int DEST_BASE_HI_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COHER_DEST_BASE_HI_1 { - struct { - unsigned int DEST_BASE_HI_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COHER_DEST_BASE_HI_2 { - struct { - unsigned int DEST_BASE_HI_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COHER_DEST_BASE_HI_3 { - struct { - unsigned int DEST_BASE_HI_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DDID_INDEX { - struct { - unsigned int INDEX : 11; - unsigned int : 21; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DESTINATION_EN_SE0 { - struct { - unsigned int CU_EN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DESTINATION_EN_SE1 { - struct { - unsigned int CU_EN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DESTINATION_EN_SE2 { - struct { - unsigned int CU_EN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DESTINATION_EN_SE3 { - struct { - unsigned int CU_EN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DIM_X { - struct { - unsigned int SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DIM_Y { - struct { - unsigned int SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DIM_Z { - struct { - unsigned int SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DISPATCH_END { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DISPATCH_ID { - struct { - unsigned int DISPATCH_ID : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DISPATCH_INITIATOR { - struct { - unsigned int COMPUTE_SHADER_EN : 1; - unsigned int PARTIAL_TG_EN : 1; - unsigned int FORCE_START_AT_000 : 1; - unsigned int ORDERED_APPEND_ENBL : 1; - unsigned int ORDERED_APPEND_MODE : 1; - unsigned int USE_THREAD_DIMENSIONS : 1; - unsigned int ORDER_MODE : 1; - unsigned int : 3; - unsigned int SCALAR_L1_INV_VOL : 1; - unsigned int VECTOR_L1_INV_VOL : 1; - unsigned int RESERVED : 1; - unsigned int : 1; - unsigned int RESTORE : 1; - unsigned int : 17; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int TUNNEL_ENABLE : 1; - unsigned int : 1; - unsigned int CS_W32_EN : 1; - unsigned int : 16; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 16; - unsigned int AMP_SHADER_EN : 1; - unsigned int DISABLE_DISP_PREMPT_EN : 1; - unsigned int : 14; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union COMPUTE_DISPATCH_INTERLEAVE { - struct { - unsigned int INTERLEAVE : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union COMPUTE_DISPATCH_PKT_ADDR_HI { - struct { - unsigned int DATA : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DISPATCH_PKT_ADDR_LO { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DISPATCH_SCRATCH_BASE_HI { - struct { - unsigned int DATA : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DISPATCH_SCRATCH_BASE_LO { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_DISPATCH_TUNNEL { - struct { - unsigned int OFF_DELAY : 10; - unsigned int IMMEDIATE : 1; - unsigned int : 21; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_MISC_RESERVED { - struct { - unsigned int : 3; - unsigned int RESERVED3 : 1; - unsigned int RESERVED4 : 1; - unsigned int WAVE_ID_BASE : 12; - unsigned int : 15; - } bits, bitfields; - struct { - unsigned int SEND_SEID : 2; - unsigned int RESERVED2 : 1; - unsigned int : 29; - } gfx09_10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SEND_SEID : 3; - unsigned int : 29; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_NOWHERE { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_NUM_THREAD_X { - struct { - unsigned int NUM_THREAD_FULL : 16; - unsigned int NUM_THREAD_PARTIAL : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_NUM_THREAD_Y { - struct { - unsigned int NUM_THREAD_FULL : 16; - unsigned int NUM_THREAD_PARTIAL : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_NUM_THREAD_Z { - struct { - unsigned int NUM_THREAD_FULL : 16; - unsigned int NUM_THREAD_PARTIAL : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_PERFCOUNT_ENABLE { - struct { - unsigned int PERFCOUNT_ENABLE : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_PGM_HI { - struct { - unsigned int DATA : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_PGM_LO { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_PGM_RSRC1 { - struct { - unsigned int VGPRS : 6; - unsigned int SGPRS : 4; - unsigned int PRIORITY : 2; - unsigned int FLOAT_MODE : 8; - unsigned int PRIV : 1; - unsigned int DX10_CLAMP : 1; - unsigned int DEBUG_MODE : 1; - unsigned int IEEE_MODE : 1; - unsigned int BULKY : 1; - unsigned int CDBG_USER : 1; - unsigned int FP16_OVFL : 1; - unsigned int : 5; - } bits, bitfields; - struct { - unsigned int : 29; - unsigned int WGP_MODE : 1; - unsigned int MEM_ORDERED : 1; - unsigned int FWD_PROGRESS : 1; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_PGM_RSRC2 { - struct { - unsigned int SCRATCH_EN : 1; - unsigned int USER_SGPR : 5; - unsigned int TRAP_PRESENT : 1; - unsigned int TGID_X_EN : 1; - unsigned int TGID_Y_EN : 1; - unsigned int TGID_Z_EN : 1; - unsigned int TG_SIZE_EN : 1; - unsigned int TIDIG_COMP_CNT : 2; - unsigned int EXCP_EN_MSB : 2; - unsigned int LDS_SIZE : 9; - unsigned int EXCP_EN : 7; - unsigned int : 1; - } bits, bitfields; - struct { - unsigned int : 31; - unsigned int SKIP_USGPR0 : 1; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_PGM_RSRC3 { - struct { - unsigned int SHARED_VGPR_CNT : 4; - unsigned int : 28; - } bits, bitfields; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 4; - unsigned int INST_PREF_SIZE : 6; - unsigned int : 22; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 10; - unsigned int TRAP_ON_START : 1; - unsigned int TRAP_ON_END : 1; - unsigned int : 19; - unsigned int IMAGE_OP : 1; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_PIPELINESTAT_ENABLE { - struct { - unsigned int PIPELINESTAT_ENABLE : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_PREF_PRI_ACCUM_0 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_PREF_PRI_ACCUM_1 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_PREF_PRI_ACCUM_2 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_PREF_PRI_ACCUM_3 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_RELAUNCH { - struct { - unsigned int PAYLOAD : 30; - unsigned int IS_EVENT : 1; - unsigned int IS_STATE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_RELAUNCH2 { - struct { - unsigned int PAYLOAD : 30; - unsigned int IS_EVENT : 1; - unsigned int IS_STATE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_REQ_CTRL { - struct { - unsigned int SOFT_GROUPING_EN : 1; - unsigned int NUMBER_OF_REQUESTS_PER_CU : 4; - unsigned int SOFT_GROUPING_ALLOCATION_TIMEOUT : 4; - unsigned int HARD_LOCK_HYSTERESIS : 1; - unsigned int HARD_LOCK_LOW_THRESHOLD : 5; - unsigned int PRODUCER_REQUEST_LOCKOUT : 1; - unsigned int GLOBAL_SCANNING_EN : 1; - unsigned int ALLOCATION_RATE_THROTTLING_THRESHOLD : 3; - unsigned int DEDICATED_PREALLOCATION_BUFFER_LIMIT : 7; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_RESOURCE_LIMITS { - struct { - unsigned int WAVES_PER_SH : 10; - unsigned int : 2; - unsigned int TG_PER_CU : 4; - unsigned int LOCK_THRESHOLD : 6; - unsigned int SIMD_DEST_CNTL : 1; - unsigned int FORCE_SIMD_DIST : 1; - unsigned int CU_GROUP_COUNT : 3; - unsigned int : 5; - } bits, bitfields; - struct { - unsigned int : 27; - unsigned int SIMD_DISABLE : 4; - unsigned int : 1; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_RESTART_X { - struct { - unsigned int RESTART : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_RESTART_Y { - struct { - unsigned int RESTART : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_RESTART_Z { - struct { - unsigned int RESTART : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_SHADER_CHKSUM { - struct { - unsigned int CHECKSUM : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_START_X { - struct { - unsigned int START : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_START_Y { - struct { - unsigned int START : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_START_Z { - struct { - unsigned int START : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_STATIC_THREAD_MGMT_SE0 { - struct { - unsigned int SH0_CU_EN : 16; - unsigned int SH1_CU_EN : 16; - } gfx09; - struct { - unsigned int SA0_CU_EN : 16; - unsigned int SA1_CU_EN : 16; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_STATIC_THREAD_MGMT_SE1 { - struct { - unsigned int SH0_CU_EN : 16; - unsigned int SH1_CU_EN : 16; - } gfx09; - struct { - unsigned int SA0_CU_EN : 16; - unsigned int SA1_CU_EN : 16; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_STATIC_THREAD_MGMT_SE2 { - struct { - unsigned int SH0_CU_EN : 16; - unsigned int SH1_CU_EN : 16; - } gfx09; - struct { - unsigned int SA0_CU_EN : 16; - unsigned int SA1_CU_EN : 16; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_STATIC_THREAD_MGMT_SE3 { - struct { - unsigned int SH0_CU_EN : 16; - unsigned int SH1_CU_EN : 16; - } gfx09; - struct { - unsigned int SA0_CU_EN : 16; - unsigned int SA1_CU_EN : 16; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union COMPUTE_STATIC_THREAD_MGMT_SE4 { - struct { - unsigned int SA0_CU_EN : 16; - unsigned int SA1_CU_EN : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union COMPUTE_STATIC_THREAD_MGMT_SE5 { - struct { - unsigned int SA0_CU_EN : 16; - unsigned int SA1_CU_EN : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union COMPUTE_STATIC_THREAD_MGMT_SE6 { - struct { - unsigned int SA0_CU_EN : 16; - unsigned int SA1_CU_EN : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union COMPUTE_STATIC_THREAD_MGMT_SE7 { - struct { - unsigned int SA0_CU_EN : 16; - unsigned int SA1_CU_EN : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union COMPUTE_THREADGROUP_ID { - struct { - unsigned int THREADGROUP_ID : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_THREAD_TRACE_ENABLE { - struct { - unsigned int THREAD_TRACE_ENABLE : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_TMPRING_SIZE { - struct { - unsigned int WAVES : 12; - unsigned int : 20; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int WAVESIZE : 13; - unsigned int : 7; - } gfx09_10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int WAVESIZE : 15; - unsigned int : 5; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_ACCUM_0 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_ACCUM_1 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_ACCUM_2 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_ACCUM_3 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_0 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_1 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_2 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_3 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_4 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_5 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_6 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_7 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_8 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_9 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_10 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_11 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_12 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_13 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_14 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_USER_DATA_15 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_VMID { - struct { - unsigned int DATA : 4; - unsigned int : 28; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_WAVE_RESTORE_ADDR_HI { - struct { - unsigned int ADDR : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union COMPUTE_WAVE_RESTORE_ADDR_LO { - struct { - unsigned int ADDR : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPC_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPC_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPC_PERFCOUNTER0_SELECT { - struct { - unsigned int : 20; - unsigned int SPM_MODE : 4; - unsigned int CNTR_MODE1 : 4; - unsigned int CNTR_MODE0 : 4; - } bits, bitfields; - struct { - unsigned int CNTR_SEL0 : 10; - unsigned int CNTR_SEL1 : 10; - unsigned int : 12; - } gfx09; - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPC_PERFCOUNTER0_SELECT1 { - struct { - unsigned int : 24; - unsigned int CNTR_MODE3 : 4; - unsigned int CNTR_MODE2 : 4; - } bits, bitfields; - struct { - unsigned int CNTR_SEL2 : 10; - unsigned int CNTR_SEL3 : 10; - unsigned int : 12; - } gfx09; - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPC_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPC_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPC_PERFCOUNTER1_SELECT { - struct { - unsigned int : 20; - unsigned int SPM_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int CNTR_MODE1 : 4; - unsigned int : 4; - } most; - struct { - unsigned int CNTR_SEL0 : 10; - unsigned int CNTR_SEL1 : 10; - unsigned int : 12; - } gfx09; - struct { - unsigned int : 28; - unsigned int CNTR_MODE0 : 4; - } gfx09_10; - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx10Core; - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int CNTR_MODE : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPF_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPF_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPF_PERFCOUNTER0_SELECT { - struct { - unsigned int : 20; - unsigned int SPM_MODE : 4; - unsigned int CNTR_MODE1 : 4; - unsigned int CNTR_MODE0 : 4; - } bits, bitfields; - struct { - unsigned int CNTR_SEL0 : 10; - unsigned int CNTR_SEL1 : 10; - unsigned int : 12; - } gfx09; - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPF_PERFCOUNTER0_SELECT1 { - struct { - unsigned int : 24; - unsigned int CNTR_MODE3 : 4; - unsigned int CNTR_MODE2 : 4; - } bits, bitfields; - struct { - unsigned int CNTR_SEL2 : 10; - unsigned int CNTR_SEL3 : 10; - unsigned int : 12; - } gfx09; - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPF_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPF_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPF_PERFCOUNTER1_SELECT { - struct { - unsigned int : 20; - unsigned int SPM_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int CNTR_MODE1 : 4; - unsigned int : 4; - } most; - struct { - unsigned int CNTR_SEL0 : 10; - unsigned int CNTR_SEL1 : 10; - unsigned int : 12; - } gfx09; - struct { - unsigned int : 28; - unsigned int CNTR_MODE0 : 4; - } gfx09_10; - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx10Core; - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int CNTR_MODE : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPG_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPG_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPG_PERFCOUNTER0_SELECT { - struct { - unsigned int : 20; - unsigned int SPM_MODE : 4; - unsigned int CNTR_MODE1 : 4; - unsigned int CNTR_MODE0 : 4; - } bits, bitfields; - struct { - unsigned int CNTR_SEL0 : 10; - unsigned int CNTR_SEL1 : 10; - unsigned int : 12; - } gfx09; - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPG_PERFCOUNTER0_SELECT1 { - struct { - unsigned int : 24; - unsigned int CNTR_MODE3 : 4; - unsigned int CNTR_MODE2 : 4; - } bits, bitfields; - struct { - unsigned int CNTR_SEL2 : 10; - unsigned int CNTR_SEL3 : 10; - unsigned int : 12; - } gfx09; - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPG_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPG_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CPG_PERFCOUNTER1_SELECT { - struct { - unsigned int : 20; - unsigned int SPM_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int CNTR_MODE1 : 4; - unsigned int : 4; - } most; - struct { - unsigned int CNTR_SEL0 : 10; - unsigned int CNTR_SEL1 : 10; - unsigned int : 12; - } gfx09; - struct { - unsigned int : 28; - unsigned int CNTR_MODE0 : 4; - } gfx09_10; - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx10Core; - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int CNTR_MODE : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_COHER_BASE { - struct { - unsigned int COHER_BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_COHER_BASE_HI { - struct { - unsigned int COHER_BASE_HI_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_COHER_CNTL { - struct { - unsigned int : 3; - unsigned int TC_NC_ACTION_ENA : 1; - unsigned int TC_WC_ACTION_ENA : 1; - unsigned int TC_INV_METADATA_ACTION_ENA : 1; - unsigned int : 9; - unsigned int TCL1_VOL_ACTION_ENA : 1; - unsigned int : 2; - unsigned int TC_WB_ACTION_ENA : 1; - unsigned int : 3; - unsigned int TCL1_ACTION_ENA : 1; - unsigned int TC_ACTION_ENA : 1; - unsigned int : 1; - unsigned int CB_ACTION_ENA : 1; - unsigned int DB_ACTION_ENA : 1; - unsigned int SH_KCACHE_ACTION_ENA : 1; - unsigned int SH_KCACHE_VOL_ACTION_ENA : 1; - unsigned int SH_ICACHE_ACTION_ENA : 1; - unsigned int SH_KCACHE_WB_ACTION_ENA : 1; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_COHER_SIZE { - struct { - unsigned int COHER_SIZE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_COHER_SIZE_HI { - struct { - unsigned int COHER_SIZE_HI_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_COHER_START_DELAY { - struct { - unsigned int START_DELAY_COUNT : 6; - unsigned int : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_COHER_STATUS { - struct { - unsigned int : 24; - unsigned int MEID : 2; - unsigned int : 5; - unsigned int STATUS : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_ME_COHER_BASE { - struct { - unsigned int COHER_BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_ME_COHER_BASE_HI { - struct { - unsigned int COHER_BASE_HI_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_ME_COHER_CNTL { - struct { - unsigned int DEST_BASE_0_ENA : 1; - unsigned int DEST_BASE_1_ENA : 1; - unsigned int : 4; - unsigned int CB0_DEST_BASE_ENA : 1; - unsigned int CB1_DEST_BASE_ENA : 1; - unsigned int CB2_DEST_BASE_ENA : 1; - unsigned int CB3_DEST_BASE_ENA : 1; - unsigned int CB4_DEST_BASE_ENA : 1; - unsigned int CB5_DEST_BASE_ENA : 1; - unsigned int CB6_DEST_BASE_ENA : 1; - unsigned int CB7_DEST_BASE_ENA : 1; - unsigned int DB_DEST_BASE_ENA : 1; - unsigned int : 4; - unsigned int DEST_BASE_2_ENA : 1; - unsigned int : 1; - unsigned int DEST_BASE_3_ENA : 1; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_ME_COHER_SIZE { - struct { - unsigned int COHER_SIZE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_ME_COHER_SIZE_HI { - struct { - unsigned int COHER_SIZE_HI_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_ME_COHER_STATUS { - struct { - unsigned int MATCHING_GFX_CNTX : 8; - unsigned int : 23; - unsigned int STATUS : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_NEEDED_COUNT0_HI { - struct { - unsigned int NUM_PRIM_NEEDED_CNT0_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_NEEDED_COUNT0_LO { - struct { - unsigned int NUM_PRIM_NEEDED_CNT0_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_NEEDED_COUNT1_HI { - struct { - unsigned int NUM_PRIM_NEEDED_CNT1_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_NEEDED_COUNT1_LO { - struct { - unsigned int NUM_PRIM_NEEDED_CNT1_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_NEEDED_COUNT2_HI { - struct { - unsigned int NUM_PRIM_NEEDED_CNT2_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_NEEDED_COUNT2_LO { - struct { - unsigned int NUM_PRIM_NEEDED_CNT2_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_NEEDED_COUNT3_HI { - struct { - unsigned int NUM_PRIM_NEEDED_CNT3_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_NEEDED_COUNT3_LO { - struct { - unsigned int NUM_PRIM_NEEDED_CNT3_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_WRITTEN_COUNT0_HI { - struct { - unsigned int NUM_PRIM_WRITTEN_CNT0_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_WRITTEN_COUNT0_LO { - struct { - unsigned int NUM_PRIM_WRITTEN_CNT0_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_WRITTEN_COUNT1_HI { - struct { - unsigned int NUM_PRIM_WRITTEN_CNT1_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_WRITTEN_COUNT1_LO { - struct { - unsigned int NUM_PRIM_WRITTEN_CNT1_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_WRITTEN_COUNT2_HI { - struct { - unsigned int NUM_PRIM_WRITTEN_CNT2_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_WRITTEN_COUNT2_LO { - struct { - unsigned int NUM_PRIM_WRITTEN_CNT2_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_WRITTEN_COUNT3_HI { - struct { - unsigned int NUM_PRIM_WRITTEN_CNT3_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_NUM_PRIM_WRITTEN_COUNT3_LO { - struct { - unsigned int NUM_PRIM_WRITTEN_CNT3_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_PERFMON_CNTL { - struct { - unsigned int PERFMON_STATE : 4; - unsigned int SPM_PERFMON_STATE : 4; - unsigned int PERFMON_ENABLE_MODE : 2; - unsigned int PERFMON_SAMPLE_ENABLE : 1; - unsigned int : 21; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_PERFMON_CNTX_CNTL { - struct { - unsigned int : 31; - unsigned int PERFMON_ENABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_SC_PSINVOC_COUNT0_HI { - struct { - unsigned int PSINVOC_COUNT0_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_SC_PSINVOC_COUNT0_LO { - struct { - unsigned int PSINVOC_COUNT0_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_SC_PSINVOC_COUNT1_HI { - struct { - unsigned int OBSOLETE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_SC_PSINVOC_COUNT1_LO { - struct { - unsigned int OBSOLETE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_STRMOUT_CNTL { - struct { - unsigned int OFFSET_UPDATE_DONE : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union CP_VGT_ASINVOC_COUNT_HI { - struct { - unsigned int ASINVOC_COUNT_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union CP_VGT_ASINVOC_COUNT_LO { - struct { - unsigned int ASINVOC_COUNT_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union CP_VGT_CSINVOC_COUNT_HI { - struct { - unsigned int CSINVOC_COUNT_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_CSINVOC_COUNT_LO { - struct { - unsigned int CSINVOC_COUNT_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_DSINVOC_COUNT_HI { - struct { - unsigned int DSINVOC_COUNT_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_DSINVOC_COUNT_LO { - struct { - unsigned int DSINVOC_COUNT_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_GSINVOC_COUNT_HI { - struct { - unsigned int GSINVOC_COUNT_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_GSINVOC_COUNT_LO { - struct { - unsigned int GSINVOC_COUNT_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_GSPRIM_COUNT_HI { - struct { - unsigned int GSPRIM_COUNT_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_GSPRIM_COUNT_LO { - struct { - unsigned int GSPRIM_COUNT_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_HSINVOC_COUNT_HI { - struct { - unsigned int HSINVOC_COUNT_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_HSINVOC_COUNT_LO { - struct { - unsigned int HSINVOC_COUNT_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_IAPRIM_COUNT_HI { - struct { - unsigned int IAPRIM_COUNT_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_IAPRIM_COUNT_LO { - struct { - unsigned int IAPRIM_COUNT_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_IAVERT_COUNT_HI { - struct { - unsigned int IAVERT_COUNT_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_IAVERT_COUNT_LO { - struct { - unsigned int IAVERT_COUNT_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_VSINVOC_COUNT_HI { - struct { - unsigned int VSINVOC_COUNT_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union CP_VGT_VSINVOC_COUNT_LO { - struct { - unsigned int VSINVOC_COUNT_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_ALPHA_TO_MASK { - struct { - unsigned int ALPHA_TO_MASK_ENABLE : 1; - unsigned int : 7; - unsigned int ALPHA_TO_MASK_OFFSET0 : 2; - unsigned int ALPHA_TO_MASK_OFFSET1 : 2; - unsigned int ALPHA_TO_MASK_OFFSET2 : 2; - unsigned int ALPHA_TO_MASK_OFFSET3 : 2; - unsigned int OFFSET_ROUND : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_CGTT_CLK_CTRL_0 { - struct { - unsigned int ON_DELAY : 4; - unsigned int OFF_HYSTERESIS : 8; - unsigned int : 4; - unsigned int SOFT_STALL_OVERRIDE7 : 1; - unsigned int SOFT_STALL_OVERRIDE6 : 1; - unsigned int SOFT_STALL_OVERRIDE5 : 1; - unsigned int SOFT_STALL_OVERRIDE4 : 1; - unsigned int SOFT_STALL_OVERRIDE3 : 1; - unsigned int SOFT_STALL_OVERRIDE2 : 1; - unsigned int SOFT_STALL_OVERRIDE1 : 1; - unsigned int SOFT_STALL_OVERRIDE0 : 1; - unsigned int SOFT_OVERRIDE7 : 1; - unsigned int SOFT_OVERRIDE6 : 1; - unsigned int SOFT_OVERRIDE5 : 1; - unsigned int SOFT_OVERRIDE4 : 1; - unsigned int SOFT_OVERRIDE3 : 1; - unsigned int SOFT_OVERRIDE2 : 1; - unsigned int SOFT_OVERRIDE1 : 1; - unsigned int SOFT_OVERRIDE0 : 1; - } most; - struct { - unsigned int : 12; - unsigned int RESERVED : 4; - unsigned int : 16; - } gfx09; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SOFT_OVERRIDE0 : 1; - unsigned int SOFT_OVERRIDE1 : 1; - unsigned int SOFT_OVERRIDE2 : 1; - unsigned int SOFT_OVERRIDE3 : 1; - unsigned int SOFT_OVERRIDE4 : 1; - unsigned int SOFT_OVERRIDE5 : 1; - unsigned int SOFT_OVERRIDE6 : 1; - unsigned int SOFT_OVERRIDE7 : 1; - unsigned int SOFT_OVERRIDE8 : 1; - unsigned int : 23; - } gfx104Plus; -#endif - struct { - unsigned int : 12; - unsigned int RESERVED : 4; - unsigned int : 16; - } gfx10Core; -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int : 9; - unsigned int RESERVED : 23; - } nv3x; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int : 9; - unsigned int RESERVED : 23; - } phx1; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_COUNT_CONTROL { - struct { - unsigned int : 1; - unsigned int PERFECT_ZPASS_COUNTS : 1; - unsigned int : 2; - unsigned int SAMPLE_RATE : 3; - unsigned int : 1; - unsigned int ZPASS_ENABLE : 4; - unsigned int ZFAIL_ENABLE : 4; - unsigned int SFAIL_ENABLE : 4; - unsigned int DBFAIL_ENABLE : 4; - unsigned int SLICE_EVEN_ENABLE : 4; - unsigned int SLICE_ODD_ENABLE : 4; - } bits, bitfields; - struct { - unsigned int ZPASS_INCREMENT_DISABLE : 1; - unsigned int : 31; - } gfx09_10; - struct { - unsigned int : 2; - unsigned int DISABLE_CONSERVATIVE_ZPASS_COUNTS : 1; - unsigned int ENHANCED_CONSERVATIVE_ZPASS_COUNTS : 1; - unsigned int : 28; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_CREDIT_LIMIT { - struct { - unsigned int DB_SC_TILE_CREDITS : 5; - unsigned int DB_SC_QUAD_CREDITS : 5; - unsigned int DB_CB_LQUAD_CREDITS : 3; - unsigned int : 19; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int DB_CB_TILE_CREDITS : 7; - unsigned int : 1; - } gfx09_10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 13; - unsigned int DB_SC_WAVE_CREDITS : 5; - unsigned int DB_SC_FREE_WAVE_CREDITS : 5; - unsigned int : 9; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DEPTH_BOUNDS_MAX { - struct { - unsigned int MAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DEPTH_BOUNDS_MIN { - struct { - unsigned int MIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DEPTH_CLEAR { - struct { - unsigned int DEPTH_CLEAR : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DEPTH_CONTROL { - struct { - unsigned int STENCIL_ENABLE : 1; - unsigned int Z_ENABLE : 1; - unsigned int Z_WRITE_ENABLE : 1; - unsigned int DEPTH_BOUNDS_ENABLE : 1; - unsigned int ZFUNC : 3; - unsigned int BACKFACE_ENABLE : 1; - unsigned int STENCILFUNC : 3; - unsigned int : 9; - unsigned int STENCILFUNC_BF : 3; - unsigned int : 7; - unsigned int ENABLE_COLOR_WRITES_ON_DEPTH_FAIL : 1; - unsigned int DISABLE_COLOR_WRITES_ON_DEPTH_PASS : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DEPTH_SIZE { - struct { - unsigned int X_MAX : 14; - unsigned int : 2; - unsigned int Y_MAX : 14; - unsigned int : 2; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DEPTH_SIZE_XY { - struct { - unsigned int X_MAX : 14; - unsigned int : 2; - unsigned int Y_MAX : 14; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DEPTH_VIEW { - struct { - unsigned int SLICE_START : 11; - unsigned int : 2; - unsigned int SLICE_MAX : 11; - unsigned int Z_READ_ONLY : 1; - unsigned int STENCIL_READ_ONLY : 1; - unsigned int MIPID : 4; - unsigned int : 2; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int SLICE_START_HI : 2; - unsigned int : 17; - unsigned int SLICE_MAX_HI : 2; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DFSM_CONFIG { - struct { - unsigned int BYPASS_DFSM : 1; - unsigned int DISABLE_PUNCHOUT : 1; - unsigned int DISABLE_POPS : 1; - unsigned int FORCE_FLUSH : 1; - unsigned int : 28; - } most; - struct { - unsigned int : 8; - unsigned int MIDDLE_PIPE_MAX_DEPTH : 7; - unsigned int : 17; - } gfx09; - struct { - unsigned int : 16; - unsigned int CAM_WATERMARK : 8; - unsigned int : 8; - } gfx101; - struct { - unsigned int : 14; - unsigned int POPS_INCREMENT_CONTROL : 2; - unsigned int CAM_WATERMARK : 7; - unsigned int FORCE_PUNCHOUT_5BIT_MODE : 1; - unsigned int : 8; - } gfx103; - struct { - unsigned int : 4; - unsigned int SQUAD_WATERMARK : 10; - unsigned int : 10; - unsigned int OUTPUT_WATCHDOG : 8; - } gfx10Core; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DFSM_CONTROL { - struct { - unsigned int PUNCHOUT_MODE : 2; - unsigned int POPS_DRAIN_PS_ON_OVERLAP : 1; - unsigned int DISALLOW_OVERFLOW : 1; - unsigned int : 28; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DFSM_FLUSH_AUX_EVENT { - struct { - unsigned int EVENT_A : 8; - unsigned int EVENT_B : 8; - unsigned int EVENT_C : 8; - unsigned int EVENT_D : 8; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DFSM_FLUSH_ENABLE { - struct { - unsigned int : 24; - unsigned int AUX_FORCE_PASSTHRU : 4; - unsigned int AUX_EVENTS : 4; - } most; - struct { - unsigned int PRIMARY_EVENTS : 10; - unsigned int : 22; - } gfx09; - struct { - unsigned int PRIMARY_EVENTS : 11; - unsigned int : 21; - } gfx10Core; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DFSM_PRIMS_IN_FLIGHT { - struct { - unsigned int HIGH_WATERMARK : 16; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int HARD_LIMIT : 16; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DFSM_TILES_IN_FLIGHT { - struct { - unsigned int HIGH_WATERMARK : 16; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int HARD_LIMIT : 16; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DFSM_WATCHDOG { - struct { - unsigned int TIMER_TARGET : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_DFSM_WATERMARK { - struct { - unsigned int DFSM_HIGH_WATERMARK : 16; - unsigned int POPS_HIGH_WATERMARK : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_EQAA { - struct { - unsigned int MAX_ANCHOR_SAMPLES : 3; - unsigned int : 1; - unsigned int PS_ITER_SAMPLES : 3; - unsigned int : 1; - unsigned int MASK_EXPORT_NUM_SAMPLES : 3; - unsigned int : 1; - unsigned int ALPHA_TO_MASK_NUM_SAMPLES : 3; - unsigned int : 1; - unsigned int HIGH_QUALITY_INTERSECTIONS : 1; - unsigned int INCOHERENT_EQAA_READS : 1; - unsigned int INTERPOLATE_COMP_Z : 1; - unsigned int INTERPOLATE_SRC_Z : 1; - unsigned int STATIC_ANCHOR_ASSOCIATIONS : 1; - unsigned int ALPHA_TO_MASK_EQAA_DISABLE : 1; - unsigned int : 2; - unsigned int OVERRASTERIZATION_AMOUNT : 3; - unsigned int ENABLE_POSTZ_OVERRASTERIZATION : 1; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_EQUAD_STUTTER_CONTROL { - struct { - unsigned int THRESHOLD : 8; - unsigned int : 8; - unsigned int TIMEOUT : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_ETILE_STUTTER_CONTROL { - struct { - unsigned int THRESHOLD : 8; - unsigned int : 8; - unsigned int TIMEOUT : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_EXCEPTION_CONTROL { - struct { - unsigned int EARLY_Z_PANIC_DISABLE : 1; - unsigned int LATE_Z_PANIC_DISABLE : 1; - unsigned int RE_Z_PANIC_DISABLE : 1; - unsigned int : 29; - } bits, bitfields; - struct { - unsigned int : 5; - unsigned int EXTRA_BITS_GROUP_A : 3; - unsigned int : 24; - } most; - struct { - unsigned int : 12; - unsigned int EXTRA_BITS_GROUP_B : 12; - unsigned int : 8; - } gfx101; - struct { - unsigned int : 16; - unsigned int FORCE_VRS_RATE_FINE : 8; - unsigned int : 8; - } gfx103Derivative; - struct { - unsigned int : 3; - unsigned int AUTO_FLUSH_HTILE : 1; - unsigned int AUTO_FLUSH_QUAD : 1; - unsigned int : 3; - unsigned int FORCE_SUMMARIZE : 4; - unsigned int : 12; - unsigned int DTAG_WATERMARK : 7; - unsigned int : 1; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_FGCG_INTERFACES_CLK_CTRL { - struct { - unsigned int DB_SC_QUAD_OVERRIDE : 1; - unsigned int : 2; - unsigned int DB_RMI_RDREQ_OVERRIDE : 1; - unsigned int DB_RMI_WRREQ_OVERRIDE : 1; - unsigned int DB_SC_TILE_OVERRIDE : 1; - unsigned int DB_CB_RMIRET_OVERRIDE : 1; - unsigned int : 25; - } bits, bitfields; - struct { - unsigned int : 1; - unsigned int DB_CB_TILE_OVERRIDE : 1; - unsigned int DB_CB_LQUAD_OVERRIDE : 1; - unsigned int : 29; - } gfx10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 2; - unsigned int DB_CB_EXPORT_OVERRIDE : 1; - unsigned int : 4; - unsigned int DB_SC_WAVE_OVERRIDE : 1; - unsigned int DB_SC_FREE_WAVE_OVERRIDE : 1; - unsigned int : 23; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_FGCG_SRAMS_CLK_CTRL { - struct { - unsigned int OVERRIDE0 : 1; - unsigned int OVERRIDE1 : 1; - unsigned int OVERRIDE2 : 1; - unsigned int OVERRIDE3 : 1; - unsigned int OVERRIDE4 : 1; - unsigned int OVERRIDE5 : 1; - unsigned int OVERRIDE6 : 1; - unsigned int OVERRIDE7 : 1; - unsigned int OVERRIDE8 : 1; - unsigned int OVERRIDE9 : 1; - unsigned int OVERRIDE10 : 1; - unsigned int OVERRIDE11 : 1; - unsigned int OVERRIDE12 : 1; - unsigned int OVERRIDE13 : 1; - unsigned int OVERRIDE14 : 1; - unsigned int OVERRIDE15 : 1; - unsigned int OVERRIDE16 : 1; - unsigned int OVERRIDE17 : 1; - unsigned int OVERRIDE18 : 1; - unsigned int OVERRIDE19 : 1; - unsigned int OVERRIDE20 : 1; - unsigned int OVERRIDE21 : 1; - unsigned int OVERRIDE22 : 1; - unsigned int OVERRIDE23 : 1; - unsigned int OVERRIDE24 : 1; - unsigned int OVERRIDE25 : 1; - unsigned int OVERRIDE26 : 1; - unsigned int : 5; - } most; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 27; - unsigned int OVERRIDE27 : 1; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int OVERRIDE28 : 1; - unsigned int OVERRIDE29 : 1; - unsigned int OVERRIDE30 : 1; - unsigned int OVERRIDE31 : 1; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_FIFO_DEPTH1 { - struct { - unsigned int DB_RMI_RDREQ_CREDITS : 5; - unsigned int DB_RMI_WRREQ_CREDITS : 5; - unsigned int MCC_DEPTH : 6; - unsigned int QC_DEPTH : 5; - unsigned int LTILE_PROBE_FIFO_DEPTH : 8; - unsigned int : 3; - } gfx09; - struct { - unsigned int MI_RDREQ_FIFO_DEPTH : 8; - unsigned int MI_WRREQ_FIFO_DEPTH : 8; - unsigned int MCC_DEPTH : 8; - unsigned int QC_DEPTH : 8; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_FIFO_DEPTH2 { - struct { - unsigned int EQUAD_FIFO_DEPTH : 8; - unsigned int : 17; - unsigned int LTILE_OP_FIFO_DEPTH : 7; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int ETILE_OP_FIFO_DEPTH : 7; - unsigned int LQUAD_FIFO_DEPTH : 10; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 8; - unsigned int ETILE_OP_FIFO_DEPTH : 8; - unsigned int LQUAD_FIFO_DEPTH : 9; - unsigned int : 7; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_FIFO_DEPTH3 { - struct { - unsigned int LTILE_PROBE_FIFO_DEPTH : 8; - unsigned int : 16; - unsigned int QUAD_READ_REQS : 8; - } bits, bitfields; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 8; - unsigned int OSB_WAVE_TABLE_DEPTH : 8; - unsigned int OREO_WAVE_HIDE_DEPTH : 8; - unsigned int : 8; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union DB_FIFO_DEPTH4 { - struct { - unsigned int OSB_SQUAD_TABLE_DEPTH : 8; - unsigned int OSB_TILE_TABLE_DEPTH : 8; - unsigned int OSB_SCORE_BOARD_DEPTH : 8; - unsigned int OSB_EVENT_FIFO_DEPTH : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union DB_FREE_CACHELINES { - struct { - unsigned int FREE_DTILE_DEPTH : 7; - unsigned int FREE_PLANE_DEPTH : 7; - unsigned int FREE_Z_DEPTH : 6; - unsigned int FREE_HTILE_DEPTH : 4; - unsigned int QUAD_READ_REQS : 8; - } gfx09; - struct { - unsigned int FREE_DTILE_DEPTH : 8; - unsigned int FREE_PLANE_DEPTH : 8; - unsigned int FREE_Z_DEPTH : 8; - unsigned int FREE_HTILE_DEPTH : 8; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_HTILE_DATA_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_HTILE_DATA_BASE_HI { - struct { - unsigned int BASE_HI : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_HTILE_SURFACE { - struct { - unsigned int : 1; - unsigned int FULL_CACHE : 1; - unsigned int : 14; - unsigned int DST_OUTSIDE_ZERO_TO_ONE : 1; - unsigned int : 1; - unsigned int PIPE_ALIGNED : 1; - unsigned int : 13; - } bits, bitfields; - struct { - unsigned int : 2; - unsigned int HTILE_USES_PRELOAD_WIN : 1; - unsigned int PRELOAD : 1; - unsigned int PREFETCH_WIDTH : 6; - unsigned int PREFETCH_HEIGHT : 6; - unsigned int : 16; - } most; - struct { - unsigned int : 19; - unsigned int RB_ALIGNED : 1; - unsigned int : 12; - } gfx09; - struct { - unsigned int RESERVED_FIELD_1 : 1; - unsigned int : 1; - unsigned int RESERVED_FIELD_2 : 1; - unsigned int RESERVED_FIELD_3 : 1; - unsigned int RESERVED_FIELD_4 : 6; - unsigned int RESERVED_FIELD_5 : 6; - unsigned int : 1; - unsigned int RESERVED_FIELD_6 : 1; - unsigned int : 14; - } gfx101; - struct { - unsigned int RESERVED_FIELD_1 : 1; - unsigned int : 1; - unsigned int RESERVED_FIELD_2 : 1; - unsigned int RESERVED_FIELD_3 : 1; - unsigned int RESERVED_FIELD_4 : 6; - unsigned int RESERVED_FIELD_5 : 6; - unsigned int : 1; - unsigned int RESERVED_FIELD_6 : 1; - unsigned int : 14; - } gfx103PlusExclusive; - struct { - unsigned int : 19; - unsigned int VRS_HTILE_ENCODING : 2; - unsigned int : 11; - } gfx10Vrs; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_LAST_OF_BURST_CONFIG { - struct { - unsigned int MAXBURST : 8; - unsigned int TIMEOUT : 3; - unsigned int : 19; - unsigned int DISABLE_RD_BURST : 1; - unsigned int LEGACY_LOB_INSERT_EN : 1; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int DBCB_LOB_SWITCH_TIMEOUT : 7; - unsigned int ENABLE_FG_DEFAULT_TIMEOUT : 1; - unsigned int DISABLE_MCC_BURST_COUNT_RESET_ON_LOB : 1; - unsigned int DISABLE_FLQ_LOB_EVERY_256B : 1; - unsigned int DISABLE_ZCACHE_FL_OP_EVEN_ARB : 1; - unsigned int DISABLE_MCC_BURST_FORCE_FLUSH_BEFORE_FIFO : 1; - unsigned int ENABLE_TIMEOUT_DKG_LOB_GEN : 1; - unsigned int ENABLE_TIMEOUT_LPF_LOB_GEN : 1; - unsigned int ENABLE_TIMEOUT_CB_LOB_GEN : 1; - unsigned int ENABLE_TIMEOUT_FL_BURST : 1; - unsigned int ENABLE_TIMEOUT_FG_LOB_FWDR : 1; - unsigned int ENABLE_TIMEOUT_RD_BA_ACCUM : 1; - unsigned int BYPASS_SORT_RD_BA : 1; - unsigned int : 2; - } most; - struct { - unsigned int : 16; - unsigned int DBCB_LOB_USES_MAXBURST : 1; - unsigned int : 7; - unsigned int ENABLE_TIMEOUT_CB_LOB_GEN : 1; - unsigned int : 2; - unsigned int ENABLE_TIMEOUT_RD_BA_ACCUM : 1; - unsigned int : 4; - } gfx103Derivative; - struct { - unsigned int : 11; - unsigned int DBCB_LOB_SWITCH_TIMEOUT : 5; - unsigned int : 1; - unsigned int ENABLE_FG_DEFAULT_TIMEOUT : 1; - unsigned int DISABLE_MCC_BURST_COUNT_RESET_ON_LOB : 1; - unsigned int DISABLE_FLQ_LOB_EVERY_256B : 1; - unsigned int DISABLE_ZCACHE_FL_OP_EVEN_ARB : 1; - unsigned int DISABLE_MCC_BURST_FORCE_FLUSH_BEFORE_FIFO : 1; - unsigned int ENABLE_TIMEOUT_DKG_LOB_GEN : 1; - unsigned int ENABLE_TIMEOUT_LPF_LOB_GEN : 1; - unsigned int : 1; - unsigned int ENABLE_TIMEOUT_FL_BURST : 1; - unsigned int ENABLE_TIMEOUT_FG_LOB_FWDR : 1; - unsigned int : 1; - unsigned int BYPASS_SORT_RD_BA : 1; - unsigned int DISABLE_256B_COALESCE : 1; - unsigned int : 2; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_LQUAD_STUTTER_CONTROL { - struct { - unsigned int THRESHOLD : 8; - unsigned int : 8; - unsigned int TIMEOUT : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_LTILE_STUTTER_CONTROL { - struct { - unsigned int THRESHOLD : 8; - unsigned int : 8; - unsigned int TIMEOUT : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_MEM_ARB_WATERMARKS { - struct { - unsigned int CLIENT0_WATERMARK : 3; - unsigned int : 5; - unsigned int CLIENT1_WATERMARK : 3; - unsigned int : 5; - unsigned int CLIENT2_WATERMARK : 3; - unsigned int : 5; - unsigned int CLIENT3_WATERMARK : 3; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_OCCLUSION_COUNT0_HI { - struct { - unsigned int COUNT_HI : 31; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_OCCLUSION_COUNT0_LOW { - struct { - unsigned int COUNT_LOW : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_OCCLUSION_COUNT1_HI { - struct { - unsigned int COUNT_HI : 31; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_OCCLUSION_COUNT1_LOW { - struct { - unsigned int COUNT_LOW : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_OCCLUSION_COUNT2_HI { - struct { - unsigned int COUNT_HI : 31; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_OCCLUSION_COUNT2_LOW { - struct { - unsigned int COUNT_LOW : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_OCCLUSION_COUNT3_HI { - struct { - unsigned int COUNT_HI : 31; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_OCCLUSION_COUNT3_LOW { - struct { - unsigned int COUNT_LOW : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_PRELOAD_CONTROL { - struct { - unsigned int START_X : 8; - unsigned int START_Y : 8; - unsigned int MAX_X : 8; - unsigned int MAX_Y : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_RENDER_CONTROL { - struct { - unsigned int DEPTH_CLEAR_ENABLE : 1; - unsigned int STENCIL_CLEAR_ENABLE : 1; - unsigned int DEPTH_COPY : 1; - unsigned int STENCIL_COPY : 1; - unsigned int RESUMMARIZE_ENABLE : 1; - unsigned int STENCIL_COMPRESS_DISABLE : 1; - unsigned int DEPTH_COMPRESS_DISABLE : 1; - unsigned int COPY_CENTROID : 1; - unsigned int COPY_SAMPLE : 4; - unsigned int DECOMPRESS_ENABLE : 1; - unsigned int : 19; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int : 1; - unsigned int : 18; - } gfx103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 14; - unsigned int : 1; - unsigned int : 1; - unsigned int OREO_MODE : 2; - unsigned int FORCE_OREO_MODE : 1; - unsigned int FORCE_EXPORT_ORDER : 1; - unsigned int MAX_ALLOWED_TILES_IN_WAVE : 4; - unsigned int : 8; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_RENDER_OVERRIDE { - struct { - unsigned int FORCE_HIZ_ENABLE : 2; - unsigned int FORCE_HIS_ENABLE0 : 2; - unsigned int FORCE_HIS_ENABLE1 : 2; - unsigned int FORCE_SHADER_Z_ORDER : 1; - unsigned int FAST_Z_DISABLE : 1; - unsigned int FAST_STENCIL_DISABLE : 1; - unsigned int NOOP_CULL_DISABLE : 1; - unsigned int FORCE_COLOR_KILL : 1; - unsigned int FORCE_Z_READ : 1; - unsigned int FORCE_STENCIL_READ : 1; - unsigned int FORCE_FULL_Z_RANGE : 2; - unsigned int : 1; - unsigned int DISABLE_VIEWPORT_CLAMP : 1; - unsigned int IGNORE_SC_ZRANGE : 1; - unsigned int DISABLE_FULLY_COVERED : 1; - unsigned int FORCE_Z_LIMIT_SUMM : 2; - unsigned int MAX_TILES_IN_DTT : 5; - unsigned int DISABLE_TILE_RATE_TILES : 1; - unsigned int FORCE_Z_DIRTY : 1; - unsigned int FORCE_STENCIL_DIRTY : 1; - unsigned int FORCE_Z_VALID : 1; - unsigned int FORCE_STENCIL_VALID : 1; - unsigned int PRESERVE_COMPRESSION : 1; - } bits, bitfields; - struct { - unsigned int : 15; - unsigned int FORCE_QC_SMASK_CONFLICT : 1; - unsigned int : 16; - } gfx09_10; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_RENDER_OVERRIDE2 { - struct { - unsigned int PARTIAL_SQUAD_LAUNCH_CONTROL : 2; - unsigned int PARTIAL_SQUAD_LAUNCH_COUNTDOWN : 3; - unsigned int DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION : 1; - unsigned int DISABLE_SMEM_EXPCLEAR_OPTIMIZATION : 1; - unsigned int DISABLE_COLOR_ON_VALIDATION : 1; - unsigned int DECOMPRESS_Z_ON_FLUSH : 1; - unsigned int DISABLE_REG_SNOOP : 1; - unsigned int DEPTH_BOUNDS_HIER_DEPTH_DISABLE : 1; - unsigned int SEPARATE_HIZS_FUNC_ENABLE : 1; - unsigned int HIZ_ZFUNC : 3; - unsigned int HIS_SFUNC_FF : 3; - unsigned int HIS_SFUNC_BF : 3; - unsigned int PRESERVE_ZRANGE : 1; - unsigned int PRESERVE_SRESULTS : 1; - unsigned int DISABLE_FAST_PASS : 1; - unsigned int : 1; - unsigned int ALLOW_PARTIAL_RES_HIER_KILL : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 27; - unsigned int CENTROID_COMPUTATION_MODE : 2; - unsigned int : 3; - } gfx103Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 29; - unsigned int DISABLE_NOZ : 1; - unsigned int : 2; - } gfx104Plus; -#endif - struct { - unsigned int : 26; - unsigned int FORCE_VRS_RATE_FINE : 1; - unsigned int : 5; - } gfx10Vrs; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_RESERVED_REG_1 { - struct { - unsigned int FIELD_1 : 11; - unsigned int FIELD_2 : 11; - unsigned int : 10; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_RESERVED_REG_2 { - struct { - unsigned int FIELD_1 : 4; - unsigned int FIELD_2 : 4; - unsigned int FIELD_3 : 5; - unsigned int FIELD_4 : 2; - unsigned int FIELD_5 : 2; - unsigned int FIELD_6 : 2; - unsigned int FIELD_7 : 2; - unsigned int : 7; - unsigned int FIELD_8 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_RESERVED_REG_3 { - struct { - unsigned int FIELD_1 : 22; - unsigned int : 10; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_RING_CONTROL { - struct { - unsigned int COUNTER_CONTROL : 2; - unsigned int : 30; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_RMI_BC_GL2_CACHE_CONTROL { - struct { - unsigned int Z_WR_POLICY : 2; - unsigned int S_WR_POLICY : 2; - unsigned int HTILE_WR_POLICY : 2; - unsigned int ZPCPSD_WR_POLICY : 2; - unsigned int : 8; - unsigned int Z_RD_POLICY : 2; - unsigned int S_RD_POLICY : 2; - unsigned int HTILE_RD_POLICY : 2; - unsigned int : 9; - unsigned int VOL : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_RMI_CACHE_POLICY { - struct { - unsigned int Z_RD : 1; - unsigned int S_RD : 1; - unsigned int HTILE_RD : 1; - unsigned int : 5; - unsigned int Z_WR : 1; - unsigned int S_WR : 1; - unsigned int HTILE_WR : 1; - unsigned int ZPCPSD_WR : 1; - unsigned int : 4; - unsigned int CC_RD : 1; - unsigned int FMASK_RD : 1; - unsigned int CMASK_RD : 1; - unsigned int DCC_RD : 1; - unsigned int : 4; - unsigned int CC_WR : 1; - unsigned int FMASK_WR : 1; - unsigned int CMASK_WR : 1; - unsigned int DCC_WR : 1; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_RMI_L2_CACHE_CONTROL { - struct { - unsigned int Z_WR_POLICY : 2; - unsigned int S_WR_POLICY : 2; - unsigned int HTILE_WR_POLICY : 2; - unsigned int ZPCPSD_WR_POLICY : 2; - unsigned int : 8; - unsigned int Z_RD_POLICY : 2; - unsigned int S_RD_POLICY : 2; - unsigned int HTILE_RD_POLICY : 2; - unsigned int : 2; - unsigned int Z_BIG_PAGE : 1; - unsigned int S_BIG_PAGE : 1; - unsigned int : 6; - } bits, bitfields; -#if CHIP_HDR_NAVI21|| CHIP_HDR_NAVI22|| CHIP_HDR_NAVI23|| CHIP_HDR_NAVI24|| CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33 - struct { - unsigned int : 26; - unsigned int Z_NOALLOC : 1; - unsigned int S_NOALLOC : 1; - unsigned int HTILE_NOALLOC : 1; - unsigned int ZPCPSD_NOALLOC : 1; - unsigned int : 2; - } mall; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_SHADER_CONTROL { - struct { - unsigned int Z_EXPORT_ENABLE : 1; - unsigned int STENCIL_TEST_VAL_EXPORT_ENABLE : 1; - unsigned int STENCIL_OP_VAL_EXPORT_ENABLE : 1; - unsigned int : 1; - unsigned int Z_ORDER : 2; - unsigned int KILL_ENABLE : 1; - unsigned int COVERAGE_TO_MASK_ENABLE : 1; - unsigned int MASK_EXPORT_ENABLE : 1; - unsigned int EXEC_ON_HIER_FAIL : 1; - unsigned int EXEC_ON_NOOP : 1; - unsigned int ALPHA_TO_MASK_DISABLE : 1; - unsigned int DEPTH_BEFORE_SHADER : 1; - unsigned int CONSERVATIVE_Z_EXPORT : 2; - unsigned int DUAL_QUAD_DISABLE : 1; - unsigned int PRIMITIVE_ORDERED_PIXEL_SHADER : 1; - unsigned int : 15; - } bits, bitfields; - struct { - unsigned int : 17; - unsigned int EXEC_IF_OVERLAPPED : 1; - unsigned int : 2; - unsigned int POPS_OVERLAP_NUM_SAMPLES : 3; - unsigned int : 9; - } gfx09_10; - struct { - unsigned int : 23; - unsigned int PRE_SHADER_DEPTH_COVERAGE_ENABLE : 1; - unsigned int : 8; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int OREO_BLEND_ENABLE : 1; - unsigned int OVERRIDE_INTRINSIC_RATE_ENABLE : 1; - unsigned int OVERRIDE_INTRINSIC_RATE : 3; - unsigned int : 3; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_SPI_VRS_CENTER_LOCATION { - struct { - unsigned int CENTER_X_OFFSET_1X1 : 4; - unsigned int CENTER_Y_OFFSET_1X1 : 4; - unsigned int CENTER_X_OFFSET_2X1 : 4; - unsigned int CENTER_Y_OFFSET_2X1 : 4; - unsigned int CENTER_X_OFFSET_1X2 : 4; - unsigned int CENTER_Y_OFFSET_1X2 : 4; - unsigned int CENTER_X_OFFSET_2X2 : 4; - unsigned int CENTER_Y_OFFSET_2X2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_SRESULTS_COMPARE_STATE0 { - struct { - unsigned int COMPAREFUNC0 : 3; - unsigned int : 1; - unsigned int COMPAREVALUE0 : 8; - unsigned int COMPAREMASK0 : 8; - unsigned int : 4; - unsigned int ENABLE0 : 1; - unsigned int : 7; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_SRESULTS_COMPARE_STATE1 { - struct { - unsigned int COMPAREFUNC1 : 3; - unsigned int : 1; - unsigned int COMPAREVALUE1 : 8; - unsigned int COMPAREMASK1 : 8; - unsigned int : 4; - unsigned int ENABLE1 : 1; - unsigned int : 7; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_STENCILREFMASK { - struct { - unsigned int STENCILTESTVAL : 8; - unsigned int STENCILMASK : 8; - unsigned int STENCILWRITEMASK : 8; - unsigned int STENCILOPVAL : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_STENCILREFMASK_BF { - struct { - unsigned int STENCILTESTVAL_BF : 8; - unsigned int STENCILMASK_BF : 8; - unsigned int STENCILWRITEMASK_BF : 8; - unsigned int STENCILOPVAL_BF : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_STENCIL_CLEAR { - struct { - unsigned int CLEAR : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_STENCIL_CONTROL { - struct { - unsigned int STENCILFAIL : 4; - unsigned int STENCILZPASS : 4; - unsigned int STENCILZFAIL : 4; - unsigned int STENCILFAIL_BF : 4; - unsigned int STENCILZPASS_BF : 4; - unsigned int STENCILZFAIL_BF : 4; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_STENCIL_INFO { - struct { - unsigned int FORMAT : 1; - unsigned int : 3; - unsigned int SW_MODE : 5; - unsigned int : 3; - unsigned int PARTIALLY_RESIDENT : 1; - unsigned int : 14; - unsigned int ALLOW_EXPCLEAR : 1; - unsigned int : 1; - unsigned int TILE_STENCIL_DISABLE : 1; - unsigned int : 2; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int FAULT_BEHAVIOR : 2; - unsigned int ITERATE_FLUSH : 1; - unsigned int : 14; - unsigned int CLEAR_DISALLOWED : 1; - unsigned int : 1; - } gfx09; - struct { - unsigned int : 13; - unsigned int RESERVED_FIELD_1 : 3; - unsigned int : 16; - } gfx101; - struct { - unsigned int : 13; - unsigned int RESERVED_FIELD_1 : 3; - unsigned int : 16; - } gfx103PlusExclusive; - struct { - unsigned int : 9; - unsigned int FAULT_BEHAVIOR : 2; - unsigned int ITERATE_FLUSH : 1; - unsigned int : 8; - unsigned int ITERATE_256 : 1; - unsigned int : 11; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_STENCIL_INFO2 { - struct { - unsigned int EPITCH : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_STENCIL_READ_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_STENCIL_READ_BASE_HI { - struct { - unsigned int BASE_HI : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_STENCIL_WRITE_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_STENCIL_WRITE_BASE_HI { - struct { - unsigned int BASE_HI : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_SUBTILE_CONTROL { - struct { - unsigned int MSAA1_X : 2; - unsigned int MSAA1_Y : 2; - unsigned int MSAA2_X : 2; - unsigned int MSAA2_Y : 2; - unsigned int MSAA4_X : 2; - unsigned int MSAA4_Y : 2; - unsigned int MSAA8_X : 2; - unsigned int MSAA8_Y : 2; - unsigned int MSAA16_X : 2; - unsigned int MSAA16_Y : 2; - unsigned int : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_VRS_OVERRIDE_CNTL { - struct { - unsigned int VRS_OVERRIDE_RATE_COMBINER_MODE : 3; - unsigned int : 1; - unsigned int VRS_OVERRIDE_RATE_X : 2; - unsigned int VRS_OVERRIDE_RATE_Y : 2; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_WATERMARKS { - struct { - unsigned int DEPTH_FREE : 5; - unsigned int DEPTH_FLUSH : 6; - unsigned int FORCE_SUMMARIZE : 4; - unsigned int DEPTH_PENDING_FREE : 5; - unsigned int DEPTH_CACHELINE_FREE : 8; - unsigned int : 2; - unsigned int AUTO_FLUSH_HTILE : 1; - unsigned int AUTO_FLUSH_QUAD : 1; - } gfx09; - struct { - unsigned int DEPTH_FREE : 8; - unsigned int DEPTH_FLUSH : 8; - unsigned int DEPTH_PENDING_FREE : 8; - unsigned int DEPTH_CACHELINE_FREE : 8; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_ZPASS_COUNT_HI { - struct { - unsigned int COUNT_HI : 31; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_ZPASS_COUNT_LOW { - struct { - unsigned int COUNT_LOW : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_Z_INFO { - struct { - unsigned int FORMAT : 2; - unsigned int NUM_SAMPLES : 2; - unsigned int SW_MODE : 5; - unsigned int : 3; - unsigned int PARTIALLY_RESIDENT : 1; - unsigned int : 3; - unsigned int MAXMIP : 4; - unsigned int : 3; - unsigned int DECOMPRESS_ON_N_ZPLANES : 4; - unsigned int ALLOW_EXPCLEAR : 1; - unsigned int READ_SIZE : 1; - unsigned int TILE_SURFACE_ENABLE : 1; - unsigned int : 1; - unsigned int ZRANGE_PRECISION : 1; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int FAULT_BEHAVIOR : 2; - unsigned int ITERATE_FLUSH : 1; - unsigned int : 14; - unsigned int CLEAR_DISALLOWED : 1; - unsigned int : 1; - } gfx09; - struct { - unsigned int : 13; - unsigned int RESERVED_FIELD_1 : 3; - unsigned int : 16; - } gfx101; - struct { - unsigned int : 13; - unsigned int RESERVED_FIELD_1 : 3; - unsigned int : 16; - } gfx103PlusExclusive; - struct { - unsigned int : 9; - unsigned int FAULT_BEHAVIOR : 2; - unsigned int ITERATE_FLUSH : 1; - unsigned int : 8; - unsigned int ITERATE_256 : 1; - unsigned int : 11; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_Z_INFO2 { - struct { - unsigned int EPITCH : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_Z_READ_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_Z_READ_BASE_HI { - struct { - unsigned int BASE_HI : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_Z_WRITE_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DB_Z_WRITE_BASE_HI { - struct { - unsigned int BASE_HI : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi0 { - struct { - unsigned int EventSelect_11_8 : 4; - unsigned int : 25; - unsigned int EventSelect_13_12 : 2; - unsigned int BypassEn : 1; - } most; -#if CHIP_HDR_NAVI31 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } phx1; -#endif - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } raphael; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi1 { - struct { - unsigned int EventSelect_11_8 : 4; - unsigned int : 25; - unsigned int EventSelect_13_12 : 2; - unsigned int BypassEn : 1; - } most; -#if CHIP_HDR_NAVI31 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } phx1; -#endif - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } raphael; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi2 { - struct { - unsigned int EventSelect_11_8 : 4; - unsigned int : 25; - unsigned int EventSelect_13_12 : 2; - unsigned int BypassEn : 1; - } most; -#if CHIP_HDR_NAVI31 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } phx1; -#endif - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } raphael; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi3 { - struct { - unsigned int EventSelect_11_8 : 4; - unsigned int : 25; - unsigned int EventSelect_13_12 : 2; - unsigned int BypassEn : 1; - } most; -#if CHIP_HDR_NAVI31 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } phx1; -#endif - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } raphael; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi4 { - struct { - unsigned int EventSelect_11_8 : 4; - unsigned int : 25; - unsigned int EventSelect_13_12 : 2; - unsigned int BypassEn : 1; - } most; -#if CHIP_HDR_NAVI31 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } phx1; -#endif - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } raphael; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi5 { - struct { - unsigned int EventSelect_11_8 : 4; - unsigned int : 25; - unsigned int EventSelect_13_12 : 2; - unsigned int BypassEn : 1; - } most; -#if CHIP_HDR_NAVI31 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } phx1; -#endif - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } raphael; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi6 { - struct { - unsigned int EventSelect_11_8 : 4; - unsigned int : 25; - unsigned int EventSelect_13_12 : 2; - unsigned int BypassEn : 1; - } most; -#if CHIP_HDR_NAVI31 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } phx1; -#endif - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } raphael; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi7 { - struct { - unsigned int EventSelect_11_8 : 4; - unsigned int : 25; - unsigned int EventSelect_13_12 : 2; - unsigned int BypassEn : 1; - } most; -#if CHIP_HDR_NAVI31 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } phx1; -#endif - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } raphael; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi8 { - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi9 { - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi10 { - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi11 { - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi12 { - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi13 { - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi14 { - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlHi15 { - struct { - unsigned int EventSelectHi : 6; - unsigned int : 2; - unsigned int Filter : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo0 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int : 2; - unsigned int En : 1; - unsigned int : 9; - } most; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } phx1; -#endif - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } raphael; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } rembrandt; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo1 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int : 2; - unsigned int En : 1; - unsigned int : 9; - } most; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } phx1; -#endif - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } raphael; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } rembrandt; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo2 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int : 2; - unsigned int En : 1; - unsigned int : 9; - } most; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } phx1; -#endif - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } raphael; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } rembrandt; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo3 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int : 2; - unsigned int En : 1; - unsigned int : 9; - } most; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } phx1; -#endif - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } raphael; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } rembrandt; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo4 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int : 2; - unsigned int En : 1; - unsigned int : 9; - } most; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } phx1; -#endif - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } raphael; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } rembrandt; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo5 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int : 2; - unsigned int En : 1; - unsigned int : 9; - } most; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } phx1; -#endif - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } raphael; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } rembrandt; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo6 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int : 2; - unsigned int En : 1; - unsigned int : 9; - } most; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } phx1; -#endif - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } raphael; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } rembrandt; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo7 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int : 2; - unsigned int En : 1; - unsigned int : 9; - } most; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } nv31; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } phx1; -#endif - struct { - unsigned int : 20; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int : 2; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } raphael; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } rembrandt; - struct { - unsigned int : 21; - unsigned int Mp1Int : 1; - unsigned int : 10; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo8 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int En : 1; - unsigned int : 1; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo9 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int En : 1; - unsigned int : 1; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo10 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int En : 1; - unsigned int : 1; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo11 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int En : 1; - unsigned int : 1; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo12 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int En : 1; - unsigned int : 1; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo13 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int En : 1; - unsigned int : 1; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo14 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int En : 1; - unsigned int : 1; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtlLo15 { - struct { - unsigned int EventSelect_7_0 : 8; - unsigned int UnitMaskLo : 8; - unsigned int : 3; - unsigned int DsmTrig : 1; - unsigned int Mp1Int : 1; - unsigned int BypassEn : 1; - unsigned int En : 1; - unsigned int : 1; - unsigned int UnitMaskHi : 4; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi0 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi1 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi2 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi3 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi4 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi5 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi6 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi7 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi8 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi9 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi10 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi11 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi12 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi13 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi14 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrHi15 { - struct { - unsigned int CTR_47_32 : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo0 { - struct { - unsigned int CTR_31_0 : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo1 { - struct { - unsigned int CTR_31_0 : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo2 { - struct { - unsigned int CTR_31_0 : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo3 { - struct { - unsigned int CTR_31_0 : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo4 { - struct { - unsigned int CTR_31_0 : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo5 { - struct { - unsigned int CTR_31_0 : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo6 { - struct { - unsigned int CTR_31_0 : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo7 { - struct { - unsigned int CTR_31_0 : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo8 { - struct { - unsigned int CTR_31_0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo9 { - struct { - unsigned int CTR_31_0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo10 { - struct { - unsigned int CTR_31_0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo11 { - struct { - unsigned int CTR_31_0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo12 { - struct { - unsigned int CTR_31_0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo13 { - struct { - unsigned int CTR_31_0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo14 { - struct { - unsigned int CTR_31_0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union DF_PIE_AON_PerfMonCtrLo15 { - struct { - unsigned int CTR_31_0 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GB_ADDR_CONFIG { - struct { - unsigned int NUM_PIPES : 3; - unsigned int PIPE_INTERLEAVE_SIZE : 3; - unsigned int MAX_COMPRESSED_FRAGS : 2; - unsigned int : 11; - unsigned int NUM_SHADER_ENGINES : 2; - unsigned int : 5; - unsigned int NUM_RB_PER_SE : 2; - unsigned int : 4; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int BANK_INTERLEAVE_SIZE : 3; - unsigned int : 1; - unsigned int NUM_BANKS : 3; - unsigned int : 1; - unsigned int SHADER_ENGINE_TILE_SIZE : 3; - unsigned int : 2; - unsigned int NUM_GPUS : 3; - unsigned int MULTI_GPU_TILE_SIZE : 2; - unsigned int : 2; - unsigned int ROW_SIZE : 2; - unsigned int NUM_LOWER_PIPES : 1; - unsigned int SE_ENABLE : 1; - } gfx09; - struct { - unsigned int : 8; - unsigned int NUM_PKRS : 3; - unsigned int : 21; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GB_ADDR_CONFIG_READ { - struct { - unsigned int NUM_PIPES : 3; - unsigned int PIPE_INTERLEAVE_SIZE : 3; - unsigned int MAX_COMPRESSED_FRAGS : 2; - unsigned int : 11; - unsigned int NUM_SHADER_ENGINES : 2; - unsigned int : 5; - unsigned int NUM_RB_PER_SE : 2; - unsigned int : 4; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int BANK_INTERLEAVE_SIZE : 3; - unsigned int : 1; - unsigned int NUM_BANKS : 3; - unsigned int : 1; - unsigned int SHADER_ENGINE_TILE_SIZE : 3; - unsigned int : 2; - unsigned int NUM_GPUS : 3; - unsigned int MULTI_GPU_TILE_SIZE : 2; - unsigned int : 2; - unsigned int ROW_SIZE : 2; - unsigned int NUM_LOWER_PIPES : 1; - unsigned int SE_ENABLE : 1; - } gfx09; - struct { - unsigned int : 8; - unsigned int NUM_PKRS : 3; - unsigned int : 21; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCEA_PERFCOUNTER0_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCEA_PERFCOUNTER1_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCEA_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCEA_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCEA_PERFCOUNTER2_MODE { - struct { - unsigned int COMPARE_MODE0 : 2; - unsigned int COMPARE_MODE1 : 2; - unsigned int COMPARE_MODE2 : 2; - unsigned int COMPARE_MODE3 : 2; - unsigned int COMPARE_VALUE0 : 4; - unsigned int COMPARE_VALUE1 : 4; - unsigned int COMPARE_VALUE2 : 4; - unsigned int COMPARE_VALUE3 : 4; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCEA_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCEA_PERFCOUNTER2_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCEA_PERFCOUNTER_HI { - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCEA_PERFCOUNTER_LO { - struct { - unsigned int COUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCEA_PERFCOUNTER_RSLT_CNTL { - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCMC_VM_L2_PERFCOUNTER0_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCMC_VM_L2_PERFCOUNTER1_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCMC_VM_L2_PERFCOUNTER2_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCMC_VM_L2_PERFCOUNTER3_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCMC_VM_L2_PERFCOUNTER4_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCMC_VM_L2_PERFCOUNTER5_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCMC_VM_L2_PERFCOUNTER6_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCMC_VM_L2_PERFCOUNTER7_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCMC_VM_L2_PERFCOUNTER_HI { - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCMC_VM_L2_PERFCOUNTER_LO { - struct { - unsigned int COUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCMC_VM_L2_PERFCOUNTER_RSLT_CNTL { - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCR_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCR_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCR_PERFCOUNTER0_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 9; - unsigned int : 1; - unsigned int PERF_SEL1 : 9; - unsigned int : 13; - } gfx10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCR_PERFCOUNTER0_SELECT1 { - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL2 : 9; - unsigned int : 1; - unsigned int PERF_SEL3 : 9; - unsigned int : 13; - } gfx10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCR_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCR_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCR_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 15; - unsigned int PERF_MODE : 4; - unsigned int CNTL_MODE : 4; - } gfx10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCVML2_PERFCOUNTER2_0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCVML2_PERFCOUNTER2_0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCVML2_PERFCOUNTER2_0_MODE { - struct { - unsigned int COMPARE_MODE0 : 2; - unsigned int COMPARE_MODE1 : 2; - unsigned int COMPARE_MODE2 : 2; - unsigned int COMPARE_MODE3 : 2; - unsigned int COMPARE_VALUE0 : 4; - unsigned int COMPARE_VALUE1 : 4; - unsigned int COMPARE_VALUE2 : 4; - unsigned int COMPARE_VALUE3 : 4; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCVML2_PERFCOUNTER2_0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCVML2_PERFCOUNTER2_0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCVML2_PERFCOUNTER2_1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCVML2_PERFCOUNTER2_1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCVML2_PERFCOUNTER2_1_MODE { - struct { - unsigned int COMPARE_MODE0 : 2; - unsigned int COMPARE_MODE1 : 2; - unsigned int COMPARE_MODE2 : 2; - unsigned int COMPARE_MODE3 : 2; - unsigned int COMPARE_VALUE0 : 4; - unsigned int COMPARE_VALUE1 : 4; - unsigned int COMPARE_VALUE2 : 4; - unsigned int COMPARE_VALUE3 : 4; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCVML2_PERFCOUNTER2_1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GCVML2_PERFCOUNTER2_1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GC_ATC_L2_PERFCOUNTER0_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GC_ATC_L2_PERFCOUNTER1_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GC_ATC_L2_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GC_ATC_L2_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GC_ATC_L2_PERFCOUNTER2_MODE { - struct { - unsigned int COMPARE_MODE0 : 2; - unsigned int COMPARE_MODE1 : 2; - unsigned int COMPARE_MODE2 : 2; - unsigned int COMPARE_MODE3 : 2; - unsigned int COMPARE_VALUE0 : 4; - unsigned int COMPARE_VALUE1 : 4; - unsigned int COMPARE_VALUE2 : 4; - unsigned int COMPARE_VALUE3 : 4; - unsigned int : 8; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GC_ATC_L2_PERFCOUNTER2_SELECT { - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } most; -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 18; - unsigned int PERF_MODE0 : 4; - } apu11; -#endif - struct { - unsigned int PERF_SEL : 10; - unsigned int : 18; - unsigned int PERF_MODE : 4; - } gfx101; -#if CHIP_HDR_NAVI32 - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 18; - unsigned int PERF_MODE0 : 4; - } nv32; -#endif - struct { - unsigned int PERF_SEL : 10; - unsigned int : 18; - unsigned int PERF_MODE : 4; - } raphael; - struct { - unsigned int PERF_SEL : 10; - unsigned int : 18; - unsigned int PERF_MODE : 4; - } rembrandt; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GC_ATC_L2_PERFCOUNTER2_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GC_ATC_L2_PERFCOUNTER_HI { - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GC_ATC_L2_PERFCOUNTER_LO { - struct { - unsigned int COUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GC_ATC_L2_PERFCOUNTER_RSLT_CNTL { - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER0_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } most; - struct { - unsigned int PERFCOUNTER_SELECT : 10; - unsigned int PERFCOUNTER_SELECT1 : 10; - unsigned int : 12; - } gfx09_0; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } most; - struct { - unsigned int PERFCOUNTER_SELECT2 : 10; - unsigned int PERFCOUNTER_SELECT3 : 10; - unsigned int : 12; - } gfx09_0; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER1_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } most; - struct { - unsigned int PERFCOUNTER_SELECT : 10; - unsigned int PERFCOUNTER_SELECT1 : 10; - unsigned int : 12; - } gfx09_0; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER2_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } most; - struct { - unsigned int PERFCOUNTER_SELECT : 10; - unsigned int PERFCOUNTER_SELECT1 : 10; - unsigned int : 12; - } gfx09_0; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER2_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER3_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } most; - struct { - unsigned int PERFCOUNTER_SELECT : 10; - unsigned int PERFCOUNTER_SELECT1 : 10; - unsigned int : 12; - } gfx09_0; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GDS_PERFCOUNTER3_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER0_SELECT { - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } gfx103; - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 22; - } gfx103CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int PERF_MODE0 : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } gfx103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER1_SELECT { - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } gfx103; - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 22; - } gfx103CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int PERF_MODE0 : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } gfx103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER2_SELECT { - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } gfx103; - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 22; - } gfx103CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int PERF_MODE0 : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER2_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } gfx103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER3_SELECT { - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } gfx103; - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 22; - } gfx103CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int PERF_MODE0 : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE1_PERFCOUNTER3_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } gfx103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER0_SELECT { - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } gfx103; - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 22; - } gfx103CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int PERF_MODE0 : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } gfx103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER1_SELECT { - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } gfx103; - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 22; - } gfx103CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int PERF_MODE0 : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } gfx103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER2_SELECT { - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } gfx103; - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 22; - } gfx103CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int PERF_MODE0 : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER2_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } gfx103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER3_SELECT { - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } gfx103; - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 22; - } gfx103CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int PERF_MODE0 : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_DIST_PERFCOUNTER3_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } gfx103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER0_SELECT { - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } gfx103; - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 22; - } gfx103CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int PERF_MODE0 : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } gfx103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER1_SELECT { - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } gfx103; - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 22; - } gfx103CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int PERF_MODE0 : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } gfx103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER2_SELECT { - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } gfx103; - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 22; - } gfx103CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int PERF_MODE0 : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER2_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } gfx103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER3_SELECT { - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } gfx103; - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 22; - } gfx103CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int PERF_MODE0 : 4; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE2_SE_PERFCOUNTER3_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } gfx103; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_CNTL { - struct { - unsigned int : 19; - unsigned int PACKET_TO_ONE_PA : 1; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int PRIM_GRP_SIZE : 9; - unsigned int VERT_GRP_SIZE : 9; - unsigned int BREAK_WAVE_AT_EOI : 1; - unsigned int : 13; - } gfx10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PRIMS_PER_SUBGRP : 9; - unsigned int VERTS_PER_SUBGRP : 9; - unsigned int BREAK_SUBGRP_AT_EOI : 1; - unsigned int : 1; - unsigned int BREAK_PRIMGRP_AT_EOI : 1; - unsigned int PRIM_GRP_SIZE : 9; - unsigned int GCR_DISABLE : 1; - unsigned int DIS_PG_SIZE_ADJUST_FOR_STRIP : 1; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_DMA_FIRST_INDEX { - struct { - unsigned int FIRST_INDEX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_FAST_CLKS { - struct { - unsigned int : 30; - unsigned int LOCK : 1; - unsigned int FORCE_FAST_CLK : 1; - } most; - struct { - unsigned int HYSTERESIS : 30; - unsigned int : 2; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI32 -union GE_FED_STATUS { - struct { - unsigned int DMA_C0_FED_ERROR : 1; - unsigned int DMA_C1_FED_ERROR : 1; - unsigned int TF_REQ_FED_ERROR : 1; - unsigned int : 29; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union GE_GS_FAST_LAUNCH_WG_DIM { - struct { - unsigned int GS_FL_DIM_X : 16; - unsigned int GS_FL_DIM_Y : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union GE_GS_FAST_LAUNCH_WG_DIM_1 { - struct { - unsigned int GS_FL_DIM_Z : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union GE_INDX_OFFSET { - struct { - unsigned int INDX_OFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_MAX_OUTPUT_PER_SUBGROUP { - struct { - unsigned int MAX_VERTS_PER_SUBGROUP : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_MAX_VTX_INDX { - struct { - unsigned int MAX_INDX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_MIN_VTX_INDX { - struct { - unsigned int MIN_INDX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_MULTI_PRIM_IB_RESET_EN { - struct { - unsigned int RESET_EN : 1; - unsigned int MATCH_ALL_BITS : 1; - unsigned int : 30; - } bits, bitfields; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 2; - unsigned int DISABLE_FOR_AUTO_INDEX : 1; - unsigned int : 29; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_NGG_SUBGRP_CNTL { - struct { - unsigned int PRIM_AMP_FACTOR : 9; - unsigned int THDS_PER_SUBGRP : 9; - unsigned int : 14; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union GE_PA_IF_SAFE_REG { - struct { - unsigned int GE_PA_CSB : 10; - unsigned int GE_PA_PAYLOAD : 10; - unsigned int : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union GE_PC_ALLOC { - struct { - unsigned int OVERSUB_EN : 1; - unsigned int NUM_PC_LINES : 10; - unsigned int : 21; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PC_CNTL { - struct { - unsigned int PC_SIZE : 16; - unsigned int : 16; - } bits, bitfields; - struct { - unsigned int : 18; - unsigned int WAVES_WITH_NO_GRANT : 4; - unsigned int : 10; - } gfx103Derivative; - struct { - unsigned int : 16; - unsigned int EN_GEN_0_1_LATE_ALLOC : 1; - unsigned int : 15; - } gfx10Core; - struct { - unsigned int : 17; - unsigned int NO_RESERVATION_EN : 1; - unsigned int : 14; - } gfx10Vrs; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL0 : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL0 : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL0 : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER2_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL0 : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE0 : 4; - unsigned int PERF_MODE1 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER3_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER4_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER4_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER4_SELECT { - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 18; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER5_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER5_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER5_SELECT { - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 18; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER6_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER6_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER6_SELECT { - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 18; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER7_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER7_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER7_SELECT { - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 18; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER8_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER8_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER8_SELECT { - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 18; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER9_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER9_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER9_SELECT { - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 18; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER10_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER10_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER10_SELECT { - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 18; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER11_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER11_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PERFCOUNTER11_SELECT { - struct { - unsigned int PERF_SEL0 : 10; - unsigned int : 18; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_PRIV_CONTROL { - struct { - unsigned int : 1; - unsigned int CLAMP_PRIMGRP_SIZE : 9; - unsigned int RESET_ON_PIPELINE_CHANGE : 1; - unsigned int : 21; - } bits, bitfields; - struct { - unsigned int DISCARD_LEGACY : 1; - unsigned int : 31; - } gfx10; - struct { - unsigned int : 15; - unsigned int FGCG_OVERRIDE : 1; - unsigned int CLAMP_HS_OFFCHIP_PER_SE_OVERRIDE : 1; - unsigned int : 15; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int RESERVED : 1; - unsigned int : 16; - unsigned int DISABLE_ACCUM_AGM : 1; - unsigned int : 14; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union GE_RATE_CNTL_1 { - struct { - unsigned int ADD_X_CLKS_LS_VERT : 4; - unsigned int AFTER_Y_TRANS_LS_VERT : 4; - unsigned int ADD_X_CLKS_HS_VERT : 4; - unsigned int AFTER_Y_TRANS_HS_VERT : 4; - unsigned int ADD_X_CLKS_ES_VERT : 4; - unsigned int AFTER_Y_TRANS_ES_VERT : 4; - unsigned int ADD_X_CLKS_GS_PRIM : 4; - unsigned int AFTER_Y_TRANS_GS_PRIM : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union GE_RATE_CNTL_2 { - struct { - unsigned int ADD_X_CLKS_VS_VERT : 4; - unsigned int AFTER_Y_TRANS_VS_VERT : 4; - unsigned int ADD_X_CLKS_PA_PRIM : 4; - unsigned int AFTER_Y_TRANS_PA_PRIM : 4; - unsigned int ADD_X_CLKS_MERGED_HS_GS : 4; - unsigned int ADD_X_CLKS_MERGED_LS_ES : 4; - unsigned int MERGED_HS_GS_MODE : 1; - unsigned int MERGED_LS_ES_MODE : 1; - unsigned int ENABLE_RATE_CNTL : 1; - unsigned int SWAP_PRIORITY : 1; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union GE_SPI_IF_SAFE_REG { - struct { - unsigned int GE_SPI_LS_ES_DATA : 6; - unsigned int GE_SPI_HS_GS_DATA : 6; - unsigned int GE_SPI_GRP : 6; - unsigned int : 14; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union GE_STATUS { - struct { - unsigned int PERFCOUNTER_STATUS : 1; - unsigned int THREAD_TRACE_STATUS : 1; - unsigned int : 30; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_STEREO_CNTL { - struct { - unsigned int RT_SLICE : 3; - unsigned int VIEWPORT : 4; - unsigned int : 1; - unsigned int EN_STEREO : 1; - unsigned int : 23; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_USER_VGPR1 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_USER_VGPR2 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_USER_VGPR3 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_USER_VGPR_EN { - struct { - unsigned int EN_USER_VGPR1 : 1; - unsigned int EN_USER_VGPR2 : 1; - unsigned int EN_USER_VGPR3 : 1; - unsigned int : 29; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GE_VRS_RATE { - struct { - unsigned int RATE_X : 2; - unsigned int : 2; - unsigned int RATE_Y : 2; - unsigned int : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1A_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1A_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1A_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1A_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1A_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1A_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1A_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1A_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1A_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1A_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1A_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1A_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1A_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1C_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1C_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1C_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1C_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1C_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1C_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1C_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1C_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1C_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1C_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1C_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1C_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL1C_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2A_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GL2C_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_CHIP_REVISION { - struct { - unsigned int CHIP_REVISION : 8; - unsigned int : 24; - } core; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_GFX_INDEX { - struct { - unsigned int INSTANCE_INDEX : 8; - unsigned int : 8; - unsigned int SE_INDEX : 8; - unsigned int : 6; - unsigned int INSTANCE_BROADCAST_WRITES : 1; - unsigned int SE_BROADCAST_WRITES : 1; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int SH_INDEX : 8; - unsigned int : 13; - unsigned int SH_BROADCAST_WRITES : 1; - unsigned int : 2; - } gfx09; - struct { - unsigned int : 8; - unsigned int SA_INDEX : 8; - unsigned int : 13; - unsigned int SA_BROADCAST_WRITES : 1; - unsigned int : 2; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_GFX_INDEX_SR_DATA { - struct { - unsigned int INSTANCE_INDEX : 8; - unsigned int : 8; - unsigned int SE_INDEX : 8; - unsigned int : 6; - unsigned int INSTANCE_BROADCAST_WRITES : 1; - unsigned int SE_BROADCAST_WRITES : 1; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int SH_INDEX : 8; - unsigned int : 13; - unsigned int SH_BROADCAST_WRITES : 1; - unsigned int : 2; - } gfx09; - struct { - unsigned int : 8; - unsigned int SA_INDEX : 8; - unsigned int : 13; - unsigned int SA_BROADCAST_WRITES : 1; - unsigned int : 2; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_GFX_INDEX_SR_SELECT { - struct { - unsigned int INDEX : 3; - unsigned int : 29; - } bits, bitfields; - struct { - unsigned int : 31; - unsigned int VF_PF : 1; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 6; - unsigned int : 4; - unsigned int DB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int CB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int TA_BUSY_USER_DEFINED_MASK : 1; - unsigned int SX_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int SPI_BUSY_USER_DEFINED_MASK : 1; - unsigned int SC_BUSY_USER_DEFINED_MASK : 1; - unsigned int PA_BUSY_USER_DEFINED_MASK : 1; - unsigned int GRBM_BUSY_USER_DEFINED_MASK : 1; - unsigned int DB_BUSY_USER_DEFINED_MASK : 1; - unsigned int CB_BUSY_USER_DEFINED_MASK : 1; - unsigned int CP_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int GDS_BUSY_USER_DEFINED_MASK : 1; - unsigned int BCI_BUSY_USER_DEFINED_MASK : 1; - unsigned int RLC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 2; - unsigned int UTCL2_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int RMI_BUSY_USER_DEFINED_MASK : 1; - } bits, bitfields; - struct { - unsigned int : 30; - unsigned int EA_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - } core; - struct { - unsigned int : 12; - unsigned int VGT_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 10; - unsigned int IA_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - unsigned int TC_BUSY_USER_DEFINED_MASK : 1; - unsigned int WD_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } gfx09; - struct { - unsigned int : 27; - unsigned int TCP_BUSY_USER_DEFINED_MASK : 1; - unsigned int GE_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_PERFCOUNTER0_SELECT_HI { - struct { - unsigned int : 1; - unsigned int UTCL1_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL2CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int SDMA_BUSY_USER_DEFINED_MASK : 1; - unsigned int CH_BUSY_USER_DEFINED_MASK : 1; - unsigned int PH_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 2; - unsigned int GL1CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 23; - } bits, bitfields; - struct { - unsigned int : 6; - unsigned int PMM_BUSY_USER_DEFINED_MASK : 1; - unsigned int GUS_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 24; - } gfx10CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 9; - unsigned int GL1H_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 22; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 6; - unsigned int : 4; - unsigned int DB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int CB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int TA_BUSY_USER_DEFINED_MASK : 1; - unsigned int SX_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int SPI_BUSY_USER_DEFINED_MASK : 1; - unsigned int SC_BUSY_USER_DEFINED_MASK : 1; - unsigned int PA_BUSY_USER_DEFINED_MASK : 1; - unsigned int GRBM_BUSY_USER_DEFINED_MASK : 1; - unsigned int DB_BUSY_USER_DEFINED_MASK : 1; - unsigned int CB_BUSY_USER_DEFINED_MASK : 1; - unsigned int CP_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int GDS_BUSY_USER_DEFINED_MASK : 1; - unsigned int BCI_BUSY_USER_DEFINED_MASK : 1; - unsigned int RLC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 2; - unsigned int UTCL2_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int RMI_BUSY_USER_DEFINED_MASK : 1; - } bits, bitfields; - struct { - unsigned int : 30; - unsigned int EA_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - } core; - struct { - unsigned int : 12; - unsigned int VGT_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 10; - unsigned int IA_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - unsigned int TC_BUSY_USER_DEFINED_MASK : 1; - unsigned int WD_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } gfx09; - struct { - unsigned int : 27; - unsigned int TCP_BUSY_USER_DEFINED_MASK : 1; - unsigned int GE_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_PERFCOUNTER1_SELECT_HI { - struct { - unsigned int : 1; - unsigned int UTCL1_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL2CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int SDMA_BUSY_USER_DEFINED_MASK : 1; - unsigned int CH_BUSY_USER_DEFINED_MASK : 1; - unsigned int PH_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 2; - unsigned int GL1CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 23; - } bits, bitfields; - struct { - unsigned int : 6; - unsigned int PMM_BUSY_USER_DEFINED_MASK : 1; - unsigned int GUS_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 24; - } gfx10CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 9; - unsigned int GL1H_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 22; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_SE0_PERFCOUNTER_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_SE0_PERFCOUNTER_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_SE0_PERFCOUNTER_SELECT { - struct { - unsigned int PERF_SEL : 6; - unsigned int : 4; - unsigned int DB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int CB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int TA_BUSY_USER_DEFINED_MASK : 1; - unsigned int SX_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int SPI_BUSY_USER_DEFINED_MASK : 1; - unsigned int SC_BUSY_USER_DEFINED_MASK : 1; - unsigned int DB_BUSY_USER_DEFINED_MASK : 1; - unsigned int CB_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int PA_BUSY_USER_DEFINED_MASK : 1; - unsigned int BCI_BUSY_USER_DEFINED_MASK : 1; - unsigned int RMI_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 9; - } bits, bitfields; - struct { - unsigned int : 19; - unsigned int VGT_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 12; - } gfx09; - struct { - unsigned int : 23; - unsigned int UTCL1_BUSY_USER_DEFINED_MASK : 1; - unsigned int TCP_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 6; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 26; - unsigned int GL1H_BUSY_USER_DEFINED_MASK : 1; - unsigned int PC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 4; - } gfx11; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 28; - unsigned int SEDC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } nv31; -#endif -#if CHIP_HDR_NAVI32 - struct { - unsigned int : 28; - unsigned int SEDC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } nv32; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_SE1_PERFCOUNTER_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_SE1_PERFCOUNTER_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_SE1_PERFCOUNTER_SELECT { - struct { - unsigned int PERF_SEL : 6; - unsigned int : 4; - unsigned int DB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int CB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int TA_BUSY_USER_DEFINED_MASK : 1; - unsigned int SX_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int SPI_BUSY_USER_DEFINED_MASK : 1; - unsigned int SC_BUSY_USER_DEFINED_MASK : 1; - unsigned int DB_BUSY_USER_DEFINED_MASK : 1; - unsigned int CB_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int PA_BUSY_USER_DEFINED_MASK : 1; - unsigned int BCI_BUSY_USER_DEFINED_MASK : 1; - unsigned int RMI_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 9; - } bits, bitfields; - struct { - unsigned int : 19; - unsigned int VGT_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 12; - } gfx09; - struct { - unsigned int : 23; - unsigned int UTCL1_BUSY_USER_DEFINED_MASK : 1; - unsigned int TCP_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 6; - } gfx10CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 26; - unsigned int GL1H_BUSY_USER_DEFINED_MASK : 1; - unsigned int PC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 4; - } gfx11; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 28; - unsigned int SEDC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } nv31; -#endif -#if CHIP_HDR_NAVI32 - struct { - unsigned int : 28; - unsigned int SEDC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } nv32; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_SE2_PERFCOUNTER_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_SE2_PERFCOUNTER_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_SE2_PERFCOUNTER_SELECT { - struct { - unsigned int PERF_SEL : 6; - unsigned int : 4; - unsigned int DB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int CB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int TA_BUSY_USER_DEFINED_MASK : 1; - unsigned int SX_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int SPI_BUSY_USER_DEFINED_MASK : 1; - unsigned int SC_BUSY_USER_DEFINED_MASK : 1; - unsigned int DB_BUSY_USER_DEFINED_MASK : 1; - unsigned int CB_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int PA_BUSY_USER_DEFINED_MASK : 1; - unsigned int BCI_BUSY_USER_DEFINED_MASK : 1; - unsigned int RMI_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 9; - } most; - struct { - unsigned int : 19; - unsigned int VGT_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 12; - } gfx09; - struct { - unsigned int : 23; - unsigned int UTCL1_BUSY_USER_DEFINED_MASK : 1; - unsigned int TCP_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 6; - } gfx10Core; -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 28; - unsigned int SEDC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } nv31; -#endif -#if CHIP_HDR_NAVI32 - struct { - unsigned int : 28; - unsigned int SEDC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } nv32; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int : 23; - unsigned int UTCL1_BUSY_USER_DEFINED_MASK : 1; - unsigned int TCP_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1H_BUSY_USER_DEFINED_MASK : 1; - unsigned int PC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 4; - } nv3x; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_SE3_PERFCOUNTER_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_SE3_PERFCOUNTER_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GRBM_SE3_PERFCOUNTER_SELECT { - struct { - unsigned int PERF_SEL : 6; - unsigned int : 4; - unsigned int DB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int CB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int TA_BUSY_USER_DEFINED_MASK : 1; - unsigned int SX_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int SPI_BUSY_USER_DEFINED_MASK : 1; - unsigned int SC_BUSY_USER_DEFINED_MASK : 1; - unsigned int DB_BUSY_USER_DEFINED_MASK : 1; - unsigned int CB_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int PA_BUSY_USER_DEFINED_MASK : 1; - unsigned int BCI_BUSY_USER_DEFINED_MASK : 1; - unsigned int RMI_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 9; - } most; - struct { - unsigned int : 19; - unsigned int VGT_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 12; - } gfx09; - struct { - unsigned int : 23; - unsigned int UTCL1_BUSY_USER_DEFINED_MASK : 1; - unsigned int TCP_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 6; - } gfx10Core; -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 23; - unsigned int UTCL1_BUSY_USER_DEFINED_MASK : 1; - unsigned int TCP_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1H_BUSY_USER_DEFINED_MASK : 1; - unsigned int PC_BUSY_USER_DEFINED_MASK : 1; - unsigned int SEDC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } nv31; -#endif -#if CHIP_HDR_NAVI32 - struct { - unsigned int : 23; - unsigned int UTCL1_BUSY_USER_DEFINED_MASK : 1; - unsigned int TCP_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1H_BUSY_USER_DEFINED_MASK : 1; - unsigned int PC_BUSY_USER_DEFINED_MASK : 1; - unsigned int SEDC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } nv32; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31 -union GRBM_SE4_PERFCOUNTER_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union GRBM_SE4_PERFCOUNTER_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union GRBM_SE4_PERFCOUNTER_SELECT { - struct { - unsigned int PERF_SEL : 6; - unsigned int : 4; - unsigned int DB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int CB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int TA_BUSY_USER_DEFINED_MASK : 1; - unsigned int SX_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int SPI_BUSY_USER_DEFINED_MASK : 1; - unsigned int SC_BUSY_USER_DEFINED_MASK : 1; - unsigned int DB_BUSY_USER_DEFINED_MASK : 1; - unsigned int CB_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int PA_BUSY_USER_DEFINED_MASK : 1; - unsigned int BCI_BUSY_USER_DEFINED_MASK : 1; - unsigned int RMI_BUSY_USER_DEFINED_MASK : 1; - unsigned int UTCL1_BUSY_USER_DEFINED_MASK : 1; - unsigned int TCP_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1H_BUSY_USER_DEFINED_MASK : 1; - unsigned int PC_BUSY_USER_DEFINED_MASK : 1; - unsigned int SEDC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union GRBM_SE5_PERFCOUNTER_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union GRBM_SE5_PERFCOUNTER_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union GRBM_SE5_PERFCOUNTER_SELECT { - struct { - unsigned int PERF_SEL : 6; - unsigned int : 4; - unsigned int DB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int CB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int TA_BUSY_USER_DEFINED_MASK : 1; - unsigned int SX_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int SPI_BUSY_USER_DEFINED_MASK : 1; - unsigned int SC_BUSY_USER_DEFINED_MASK : 1; - unsigned int DB_BUSY_USER_DEFINED_MASK : 1; - unsigned int CB_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int PA_BUSY_USER_DEFINED_MASK : 1; - unsigned int BCI_BUSY_USER_DEFINED_MASK : 1; - unsigned int RMI_BUSY_USER_DEFINED_MASK : 1; - unsigned int UTCL1_BUSY_USER_DEFINED_MASK : 1; - unsigned int TCP_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1H_BUSY_USER_DEFINED_MASK : 1; - unsigned int PC_BUSY_USER_DEFINED_MASK : 1; - unsigned int SEDC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union GRBM_SE6_PERFCOUNTER_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union GRBM_SE6_PERFCOUNTER_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union GRBM_SE6_PERFCOUNTER_SELECT { - struct { - unsigned int PERF_SEL : 6; - unsigned int : 4; - unsigned int DB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int CB_CLEAN_USER_DEFINED_MASK : 1; - unsigned int TA_BUSY_USER_DEFINED_MASK : 1; - unsigned int SX_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int SPI_BUSY_USER_DEFINED_MASK : 1; - unsigned int SC_BUSY_USER_DEFINED_MASK : 1; - unsigned int DB_BUSY_USER_DEFINED_MASK : 1; - unsigned int CB_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 1; - unsigned int PA_BUSY_USER_DEFINED_MASK : 1; - unsigned int BCI_BUSY_USER_DEFINED_MASK : 1; - unsigned int RMI_BUSY_USER_DEFINED_MASK : 1; - unsigned int UTCL1_BUSY_USER_DEFINED_MASK : 1; - unsigned int TCP_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1CC_BUSY_USER_DEFINED_MASK : 1; - unsigned int GL1H_BUSY_USER_DEFINED_MASK : 1; - unsigned int PC_BUSY_USER_DEFINED_MASK : 1; - unsigned int SEDC_BUSY_USER_DEFINED_MASK : 1; - unsigned int : 3; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union GUS_PERFCOUNTER0_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GUS_PERFCOUNTER1_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GUS_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GUS_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GUS_PERFCOUNTER2_MODE { - struct { - unsigned int COMPARE_MODE0 : 2; - unsigned int COMPARE_MODE1 : 2; - unsigned int COMPARE_MODE2 : 2; - unsigned int COMPARE_MODE3 : 2; - unsigned int COMPARE_VALUE0 : 4; - unsigned int COMPARE_VALUE1 : 4; - unsigned int COMPARE_VALUE2 : 4; - unsigned int COMPARE_VALUE3 : 4; - unsigned int : 8; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GUS_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GUS_PERFCOUNTER2_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GUS_PERFCOUNTER_HI { - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GUS_PERFCOUNTER_LO { - struct { - unsigned int COUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union GUS_PERFCOUNTER_RSLT_CNTL { - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_CNTL_STATUS { - struct { - unsigned int IA_BUSY : 1; - unsigned int IA_DMA_BUSY : 1; - unsigned int IA_DMA_REQ_BUSY : 1; - unsigned int IA_GRP_BUSY : 1; - unsigned int IA_ADC_BUSY : 1; - unsigned int : 27; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_ENHANCE { - struct { - unsigned int MISC : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_MULTI_VGT_PARAM { - struct { - unsigned int PRIMGROUP_SIZE : 16; - unsigned int PARTIAL_VS_WAVE_ON : 1; - unsigned int SWITCH_ON_EOP : 1; - unsigned int PARTIAL_ES_WAVE_ON : 1; - unsigned int SWITCH_ON_EOI : 1; - unsigned int WD_SWITCH_ON_EOP : 1; - unsigned int : 11; - } bits, bitfields; - struct { - unsigned int : 21; - unsigned int EN_INST_OPT_BASIC : 1; - unsigned int EN_INST_OPT_ADV : 1; - unsigned int HW_USE_ONLY : 1; - unsigned int : 8; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_MULTI_VGT_PARAM_BC { - struct { - unsigned int RESERVED : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_MULTI_VGT_PARAM_PIPED { - struct { - unsigned int PRIMGROUP_SIZE : 16; - unsigned int PARTIAL_VS_WAVE_ON : 1; - unsigned int SWITCH_ON_EOP : 1; - unsigned int PARTIAL_ES_WAVE_ON : 1; - unsigned int SWITCH_ON_EOI : 1; - unsigned int WD_SWITCH_ON_EOP : 1; - unsigned int EN_INST_OPT_BASIC : 1; - unsigned int EN_INST_OPT_ADV : 1; - unsigned int HW_USE_ONLY : 1; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 8; - unsigned int : 20; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 8; - unsigned int : 20; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 8; - unsigned int : 20; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_UTCL1_CNTL { - struct { - unsigned int XNACK_REDO_TIMER_CNT : 20; - unsigned int : 3; - unsigned int VMID_RESET_MODE : 1; - unsigned int DROP_MODE : 1; - unsigned int BYPASS : 1; - unsigned int INVALIDATE : 1; - unsigned int FRAG_LIMIT_MODE : 1; - unsigned int FORCE_SNOOP : 1; - unsigned int : 3; - } bits, bitfields; - struct { - unsigned int : 29; - unsigned int FORCE_SD_VMID_DIRTY : 1; - unsigned int : 2; - } gfx09; - struct { - unsigned int : 29; - unsigned int MTYPE_OVERRIDE : 1; - unsigned int : 2; - } gfx10Plus; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 30; - unsigned int LLC_NOALLOC_OVERRIDE : 1; - unsigned int : 1; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 30; - unsigned int LLC_NOALLOC_OVERRIDE : 1; - unsigned int : 1; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 30; - unsigned int LLC_NOALLOC_OVERRIDE : 1; - unsigned int : 1; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 30; - unsigned int LLC_NOALLOC_OVERRIDE : 1; - unsigned int : 1; - } nv24; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int : 30; - unsigned int LLC_NOALLOC_OVERRIDE : 1; - unsigned int : 1; - } nv3x; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_UTCL1_STATUS { - struct { - unsigned int FAULT_DETECTED : 1; - unsigned int RETRY_DETECTED : 1; - unsigned int PRT_DETECTED : 1; - unsigned int : 5; - unsigned int FAULT_UTCL1ID : 6; - unsigned int : 2; - unsigned int RETRY_UTCL1ID : 6; - unsigned int : 2; - unsigned int PRT_UTCL1ID : 6; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union IA_UTCL1_STATUS_2 { - struct { - unsigned int IA_BUSY : 1; - unsigned int IA_DMA_BUSY : 1; - unsigned int IA_DMA_REQ_BUSY : 1; - unsigned int IA_GRP_BUSY : 1; - unsigned int IA_ADC_BUSY : 1; - unsigned int FAULT_DETECTED : 1; - unsigned int RETRY_DETECTED : 1; - unsigned int PRT_DETECTED : 1; - unsigned int FAULT_UTCL1ID : 6; - unsigned int : 2; - unsigned int RETRY_UTCL1ID : 6; - unsigned int : 2; - unsigned int PRT_UTCL1ID : 6; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union MC_VM_L2_PERFCOUNTER0_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union MC_VM_L2_PERFCOUNTER1_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union MC_VM_L2_PERFCOUNTER2_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union MC_VM_L2_PERFCOUNTER3_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union MC_VM_L2_PERFCOUNTER4_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union MC_VM_L2_PERFCOUNTER5_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union MC_VM_L2_PERFCOUNTER6_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union MC_VM_L2_PERFCOUNTER7_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union MC_VM_L2_PERFCOUNTER_HI { - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union MC_VM_L2_PERFCOUNTER_LO { - struct { - unsigned int COUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union MC_VM_L2_PERFCOUNTER_RSLT_CNTL { - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union MP1_SMN_FPS_CNT { - struct { - unsigned int COUNT : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_CLIP_CNTL { - struct { - unsigned int UCP_ENA_0 : 1; - unsigned int UCP_ENA_1 : 1; - unsigned int UCP_ENA_2 : 1; - unsigned int UCP_ENA_3 : 1; - unsigned int UCP_ENA_4 : 1; - unsigned int UCP_ENA_5 : 1; - unsigned int : 7; - unsigned int PS_UCP_Y_SCALE_NEG : 1; - unsigned int PS_UCP_MODE : 2; - unsigned int CLIP_DISABLE : 1; - unsigned int UCP_CULL_ONLY_ENA : 1; - unsigned int BOUNDARY_EDGE_FLAG_ENA : 1; - unsigned int DX_CLIP_SPACE_DEF : 1; - unsigned int DIS_CLIP_ERR_DETECT : 1; - unsigned int VTX_KILL_OR : 1; - unsigned int DX_RASTERIZATION_KILL : 1; - unsigned int : 1; - unsigned int DX_LINEAR_ATTR_CLIP_ENA : 1; - unsigned int VTE_VPORT_PROVOKE_DISABLE : 1; - unsigned int ZCLIP_NEAR_DISABLE : 1; - unsigned int ZCLIP_FAR_DISABLE : 1; - unsigned int : 4; - } bits, bitfields; - struct { - unsigned int : 28; - unsigned int ZCLIP_PROG_NEAR_ENA : 1; - unsigned int : 3; - } gfx09_1xPlus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_CNTL_STATUS { - struct { - unsigned int UTC_FAULT_DETECTED : 1; - unsigned int UTC_RETRY_DETECTED : 1; - unsigned int UTC_PRT_DETECTED : 1; - unsigned int : 29; - } most; - struct { - unsigned int : 31; - unsigned int CL_BUSY : 1; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_ENHANCE { - struct { - unsigned int CLIP_VTX_REORDER_ENA : 1; - unsigned int NUM_CLIP_SEQ : 2; - unsigned int CLIPPED_PRIM_SEQ_STALL : 1; - unsigned int VE_NAN_PROC_DISABLE : 1; - unsigned int XTRA_DEBUG_REG_SEL : 1; - unsigned int IGNORE_PIPELINE_RESET : 1; - unsigned int KILL_INNER_EDGE_FLAGS : 1; - unsigned int NGG_PA_TO_ALL_SC : 1; - unsigned int TC_LATENCY_TIME_STAMP_RESOLUTION : 2; - unsigned int NGG_BYPASS_PRIM_FILTER : 1; - unsigned int NGG_SIDEBAND_MEMORY_DEPTH : 2; - unsigned int NGG_PRIM_INDICES_FIFO_DEPTH : 3; - unsigned int : 11; - unsigned int ECO_SPARE3 : 1; - unsigned int ECO_SPARE2 : 1; - unsigned int ECO_SPARE1 : 1; - unsigned int ECO_SPARE0 : 1; - } bits, bitfields; - struct { - unsigned int : 18; - unsigned int OUTPUT_SWITCH_TO_LEGACY_EVENT : 1; - unsigned int NO_SWITCH_TO_LEGACY_AFTER_VMID_RESET : 1; - unsigned int POLY_INNER_EDGE_FLAG_DISABLE : 1; - unsigned int TC_REQUEST_PERF_CNTR_ENABLE : 1; - unsigned int : 10; - } most; - struct { - unsigned int : 17; - unsigned int PROG_NEAR_CLIP_PLANE_ENABLE : 1; - unsigned int : 14; - } gfx09_1xPlus; - struct { - unsigned int : 22; - unsigned int DISABLE_PA_PH_INTF_FINE_CLOCK_GATE : 1; - unsigned int : 9; - } gfx10; - struct { - unsigned int : 23; - unsigned int : 1; - unsigned int : 8; - } gfx101; - struct { - unsigned int : 24; - unsigned int CLAMP_NEGATIVE_BB_TO_ZERO : 1; - unsigned int : 7; - } gfx103Derivative; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 18; - unsigned int POLY_INNER_EDGE_FLAG_DISABLE : 1; - unsigned int TC_REQUEST_PERF_CNTR_ENABLE : 1; - unsigned int DISABLE_PA_PH_INTF_FINE_CLOCK_GATE : 1; - unsigned int DISABLE_PA_SX_REQ_INTF_FINE_CLOCK_GATE : 1; - unsigned int ENABLE_PA_RATE_CNTL : 1; - unsigned int CLAMP_NEGATIVE_BB_TO_ZERO : 1; - unsigned int : 8; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_GB_HORZ_CLIP_ADJ { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_GB_HORZ_DISC_ADJ { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_GB_VERT_CLIP_ADJ { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_GB_VERT_DISC_ADJ { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_NANINF_CNTL { - struct { - unsigned int VTE_XY_INF_DISCARD : 1; - unsigned int VTE_Z_INF_DISCARD : 1; - unsigned int VTE_W_INF_DISCARD : 1; - unsigned int VTE_0XNANINF_IS_0 : 1; - unsigned int VTE_XY_NAN_RETAIN : 1; - unsigned int VTE_Z_NAN_RETAIN : 1; - unsigned int VTE_W_NAN_RETAIN : 1; - unsigned int VTE_W_RECIP_NAN_IS_0 : 1; - unsigned int VS_XY_NAN_TO_INF : 1; - unsigned int VS_XY_INF_RETAIN : 1; - unsigned int VS_Z_NAN_TO_INF : 1; - unsigned int VS_Z_INF_RETAIN : 1; - unsigned int VS_W_NAN_TO_INF : 1; - unsigned int VS_W_INF_RETAIN : 1; - unsigned int VS_CLIP_DIST_INF_DISCARD : 1; - unsigned int : 5; - unsigned int VTE_NO_OUTPUT_NEG_0 : 1; - unsigned int : 11; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_NGG_CNTL { - struct { - unsigned int VERTEX_REUSE_OFF : 1; - unsigned int INDEX_BUF_EDGE_FLAG_ENA : 1; - unsigned int : 30; - } bits, bitfields; - struct { - unsigned int : 2; - unsigned int VERTEX_REUSE_DEPTH : 8; - unsigned int : 22; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_POINT_CULL_RAD { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_POINT_SIZE { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_POINT_X_RAD { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_POINT_Y_RAD { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_PROG_NEAR_CLIP_Z { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_0_W { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_0_X { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_0_Y { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_0_Z { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_1_W { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_1_X { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_1_Y { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_1_Z { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_2_W { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_2_X { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_2_Y { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_2_Z { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_3_W { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_3_X { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_3_Y { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_3_Z { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_4_W { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_4_X { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_4_Y { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_4_Z { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_5_W { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_5_X { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_5_Y { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_UCP_5_Z { - struct { - unsigned int DATA_REGISTER : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_1 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_2 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_3 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_4 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_5 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_6 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_7 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_8 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_9 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_10 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_11 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_12 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_13 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_14 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XOFFSET_15 { - struct { - unsigned int VPORT_XOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_1 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_2 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_3 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_4 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_5 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_6 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_7 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_8 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_9 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_10 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_11 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_12 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_13 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_14 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_XSCALE_15 { - struct { - unsigned int VPORT_XSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_1 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_2 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_3 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_4 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_5 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_6 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_7 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_8 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_9 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_10 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_11 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_12 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_13 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_14 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YOFFSET_15 { - struct { - unsigned int VPORT_YOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_1 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_2 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_3 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_4 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_5 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_6 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_7 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_8 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_9 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_10 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_11 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_12 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_13 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_14 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_YSCALE_15 { - struct { - unsigned int VPORT_YSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_1 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_2 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_3 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_4 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_5 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_6 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_7 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_8 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_9 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_10 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_11 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_12 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_13 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_14 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZOFFSET_15 { - struct { - unsigned int VPORT_ZOFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_1 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_2 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_3 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_4 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_5 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_6 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_7 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_8 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_9 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_10 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_11 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_12 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_13 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_14 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VPORT_ZSCALE_15 { - struct { - unsigned int VPORT_ZSCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VRS_CNTL { - struct { - unsigned int VERTEX_RATE_COMBINER_MODE : 3; - unsigned int PRIMITIVE_RATE_COMBINER_MODE : 3; - unsigned int HTILE_RATE_COMBINER_MODE : 3; - unsigned int SAMPLE_ITER_COMBINER_MODE : 3; - unsigned int : 1; - unsigned int EXPOSE_VRS_PIXELS_MASK : 1; - unsigned int CMASK_RATE_HINT_FORCE_ZERO : 1; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VS_OUT_CNTL { - struct { - unsigned int CLIP_DIST_ENA_0 : 1; - unsigned int CLIP_DIST_ENA_1 : 1; - unsigned int CLIP_DIST_ENA_2 : 1; - unsigned int CLIP_DIST_ENA_3 : 1; - unsigned int CLIP_DIST_ENA_4 : 1; - unsigned int CLIP_DIST_ENA_5 : 1; - unsigned int CLIP_DIST_ENA_6 : 1; - unsigned int CLIP_DIST_ENA_7 : 1; - unsigned int CULL_DIST_ENA_0 : 1; - unsigned int CULL_DIST_ENA_1 : 1; - unsigned int CULL_DIST_ENA_2 : 1; - unsigned int CULL_DIST_ENA_3 : 1; - unsigned int CULL_DIST_ENA_4 : 1; - unsigned int CULL_DIST_ENA_5 : 1; - unsigned int CULL_DIST_ENA_6 : 1; - unsigned int CULL_DIST_ENA_7 : 1; - unsigned int USE_VTX_POINT_SIZE : 1; - unsigned int USE_VTX_EDGE_FLAG : 1; - unsigned int USE_VTX_RENDER_TARGET_INDX : 1; - unsigned int USE_VTX_VIEWPORT_INDX : 1; - unsigned int USE_VTX_KILL_FLAG : 1; - unsigned int VS_OUT_MISC_VEC_ENA : 1; - unsigned int VS_OUT_CCDIST0_VEC_ENA : 1; - unsigned int VS_OUT_CCDIST1_VEC_ENA : 1; - unsigned int VS_OUT_MISC_SIDE_BUS_ENA : 1; - unsigned int : 7; - } bits, bitfields; - struct { - unsigned int : 26; - unsigned int : 1; - unsigned int : 5; - } most; - struct { - unsigned int : 26; - unsigned int USE_VTX_LINE_WIDTH : 1; - unsigned int : 1; - unsigned int : 4; - } gfx09; - struct { - unsigned int : 25; - unsigned int USE_VTX_GS_CUT_FLAG : 1; - unsigned int : 6; - } gfx09_10; - struct { - unsigned int : 28; - unsigned int USE_VTX_VRS_RATE : 1; - unsigned int BYPASS_VTX_RATE_COMBINER : 1; - unsigned int BYPASS_PRIM_RATE_COMBINER : 1; - unsigned int : 1; - } gfx103Plus; - struct { - unsigned int : 27; - unsigned int USE_VTX_LINE_WIDTH : 1; - unsigned int : 4; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 31; - unsigned int USE_VTX_FSR_SELECT : 1; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_CL_VTE_CNTL { - struct { - unsigned int VPORT_X_SCALE_ENA : 1; - unsigned int VPORT_X_OFFSET_ENA : 1; - unsigned int VPORT_Y_SCALE_ENA : 1; - unsigned int VPORT_Y_OFFSET_ENA : 1; - unsigned int VPORT_Z_SCALE_ENA : 1; - unsigned int VPORT_Z_OFFSET_ENA : 1; - unsigned int : 2; - unsigned int VTX_XY_FMT : 1; - unsigned int VTX_Z_FMT : 1; - unsigned int VTX_W0_FMT : 1; - unsigned int PERFCOUNTER_REF : 1; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_ENHANCE { - struct { - unsigned int ECO_SPARE0 : 1; - unsigned int ECO_SPARE1 : 1; - unsigned int ECO_SPARE2 : 1; - unsigned int ECO_SPARE3 : 1; - unsigned int DISABLE_PH_SC_INTF_FINE_CLOCK_GATE : 1; - unsigned int DISABLE_FOPKT : 1; - unsigned int DISABLE_FOPKT_SCAN_POST_RESET : 1; - unsigned int DISABLE_PH_SC_INTF_CLKEN_CLOCK_GATE : 1; - unsigned int : 1; - unsigned int DISABLE_PH_PERF_REG_FGCG : 1; - unsigned int ENABLE_PH_INTF_CLKEN_STRETCH : 3; - unsigned int : 19; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int DISABLE_USE_LAST_PH_ARBITER_PERFCOUNTER_SAMPLE_EVENT : 1; - unsigned int : 18; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 14; - unsigned int USE_PERFCOUNTER_START_STOP_EVENTS : 1; - unsigned int FORCE_PH_PERFCOUNTER_SAMPLE_ENABLE_ON : 1; - unsigned int : 16; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 16; - unsigned int PH_SPI_GE_THROTTLE_MODE : 1; - unsigned int PH_SPI_GE_THROTTLE_MODE_DISABLE : 1; - unsigned int PH_SPI_GE_THROTTLE_PERFCOUNTER_COUNT_MODE : 1; - unsigned int : 13; - } gfx11; -#endif - struct { - unsigned int : 14; - unsigned int USE_PERFCOUNTER_START_STOP_EVENTS : 1; - unsigned int FORCE_PH_PERFCOUNTER_SAMPLE_ENABLE_ON : 1; - unsigned int : 16; - } raphael; - struct { - unsigned int : 14; - unsigned int USE_PERFCOUNTER_START_STOP_EVENTS : 1; - unsigned int FORCE_PH_PERFCOUNTER_SAMPLE_ENABLE_ON : 1; - unsigned int : 16; - } rembrandt; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_INTERFACE_FIFO_SIZE { - struct { - unsigned int PA_PH_IF_FIFO_SIZE : 10; - unsigned int : 6; - unsigned int PH_SC_IF_FIFO_SIZE : 6; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER2_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER3_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER4_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER4_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER4_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER5_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER5_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER5_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER6_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER6_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER6_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER7_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER7_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_PH_PERFCOUNTER7_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_RATE_CNTL { -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int VERTEX_RATE : 4; - unsigned int PRIM_RATE : 4; - unsigned int : 24; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union PA_SC_AA_CONFIG { - struct { - unsigned int MSAA_NUM_SAMPLES : 3; - unsigned int : 1; - unsigned int AA_MASK_CENTROID_DTMN : 1; - unsigned int : 8; - unsigned int MAX_SAMPLE_DIST : 4; - unsigned int : 3; - unsigned int MSAA_EXPOSED_SAMPLES : 3; - unsigned int : 1; - unsigned int DETAIL_TO_EXPOSED_MODE : 2; - unsigned int COVERAGE_TO_SHADER_SELECT : 2; - unsigned int : 4; - } bits, bitfields; - struct { - unsigned int : 28; - unsigned int SAMPLE_COVERAGE_ENCODING : 1; - unsigned int COVERED_CENTROID_IS_CENTER : 1; - unsigned int : 2; - } gfx103Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_MASK_X0Y0_X1Y0 { - struct { - unsigned int AA_MASK_X0Y0 : 16; - unsigned int AA_MASK_X1Y0 : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_MASK_X0Y1_X1Y1 { - struct { - unsigned int AA_MASK_X0Y1 : 16; - unsigned int AA_MASK_X1Y1 : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 { - struct { - unsigned int S0_X : 4; - unsigned int S0_Y : 4; - unsigned int S1_X : 4; - unsigned int S1_Y : 4; - unsigned int S2_X : 4; - unsigned int S2_Y : 4; - unsigned int S3_X : 4; - unsigned int S3_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1 { - struct { - unsigned int S4_X : 4; - unsigned int S4_Y : 4; - unsigned int S5_X : 4; - unsigned int S5_Y : 4; - unsigned int S6_X : 4; - unsigned int S6_Y : 4; - unsigned int S7_X : 4; - unsigned int S7_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2 { - struct { - unsigned int S8_X : 4; - unsigned int S8_Y : 4; - unsigned int S9_X : 4; - unsigned int S9_Y : 4; - unsigned int S10_X : 4; - unsigned int S10_Y : 4; - unsigned int S11_X : 4; - unsigned int S11_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3 { - struct { - unsigned int S12_X : 4; - unsigned int S12_Y : 4; - unsigned int S13_X : 4; - unsigned int S13_Y : 4; - unsigned int S14_X : 4; - unsigned int S14_Y : 4; - unsigned int S15_X : 4; - unsigned int S15_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 { - struct { - unsigned int S0_X : 4; - unsigned int S0_Y : 4; - unsigned int S1_X : 4; - unsigned int S1_Y : 4; - unsigned int S2_X : 4; - unsigned int S2_Y : 4; - unsigned int S3_X : 4; - unsigned int S3_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1 { - struct { - unsigned int S4_X : 4; - unsigned int S4_Y : 4; - unsigned int S5_X : 4; - unsigned int S5_Y : 4; - unsigned int S6_X : 4; - unsigned int S6_Y : 4; - unsigned int S7_X : 4; - unsigned int S7_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2 { - struct { - unsigned int S8_X : 4; - unsigned int S8_Y : 4; - unsigned int S9_X : 4; - unsigned int S9_Y : 4; - unsigned int S10_X : 4; - unsigned int S10_Y : 4; - unsigned int S11_X : 4; - unsigned int S11_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3 { - struct { - unsigned int S12_X : 4; - unsigned int S12_Y : 4; - unsigned int S13_X : 4; - unsigned int S13_Y : 4; - unsigned int S14_X : 4; - unsigned int S14_Y : 4; - unsigned int S15_X : 4; - unsigned int S15_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 { - struct { - unsigned int S0_X : 4; - unsigned int S0_Y : 4; - unsigned int S1_X : 4; - unsigned int S1_Y : 4; - unsigned int S2_X : 4; - unsigned int S2_Y : 4; - unsigned int S3_X : 4; - unsigned int S3_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1 { - struct { - unsigned int S4_X : 4; - unsigned int S4_Y : 4; - unsigned int S5_X : 4; - unsigned int S5_Y : 4; - unsigned int S6_X : 4; - unsigned int S6_Y : 4; - unsigned int S7_X : 4; - unsigned int S7_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2 { - struct { - unsigned int S8_X : 4; - unsigned int S8_Y : 4; - unsigned int S9_X : 4; - unsigned int S9_Y : 4; - unsigned int S10_X : 4; - unsigned int S10_Y : 4; - unsigned int S11_X : 4; - unsigned int S11_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3 { - struct { - unsigned int S12_X : 4; - unsigned int S12_Y : 4; - unsigned int S13_X : 4; - unsigned int S13_Y : 4; - unsigned int S14_X : 4; - unsigned int S14_Y : 4; - unsigned int S15_X : 4; - unsigned int S15_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 { - struct { - unsigned int S0_X : 4; - unsigned int S0_Y : 4; - unsigned int S1_X : 4; - unsigned int S1_Y : 4; - unsigned int S2_X : 4; - unsigned int S2_Y : 4; - unsigned int S3_X : 4; - unsigned int S3_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1 { - struct { - unsigned int S4_X : 4; - unsigned int S4_Y : 4; - unsigned int S5_X : 4; - unsigned int S5_Y : 4; - unsigned int S6_X : 4; - unsigned int S6_Y : 4; - unsigned int S7_X : 4; - unsigned int S7_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2 { - struct { - unsigned int S8_X : 4; - unsigned int S8_Y : 4; - unsigned int S9_X : 4; - unsigned int S9_Y : 4; - unsigned int S10_X : 4; - unsigned int S10_Y : 4; - unsigned int S11_X : 4; - unsigned int S11_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3 { - struct { - unsigned int S12_X : 4; - unsigned int S12_Y : 4; - unsigned int S13_X : 4; - unsigned int S13_Y : 4; - unsigned int S14_X : 4; - unsigned int S14_Y : 4; - unsigned int S15_X : 4; - unsigned int S15_Y : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_SC_ATM_CNTL { - struct { - unsigned int SC_PC_IF_SIZE : 6; - unsigned int : 1; - unsigned int DISABLE_SC_PC_IF_FGCG_EN : 1; - unsigned int MAX_ATTRIBUTES_IN_WAVE : 8; - unsigned int DISABLE_MAX_ATTRIBUTES : 1; - unsigned int SELECT_MAX_ATTRIBUTES : 1; - unsigned int : 14; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union PA_SC_BC_WAVE_BREAK { - struct { - unsigned int MAX_DEALLOCS_IN_WAVE : 11; - unsigned int : 5; - unsigned int MAX_FPOVS_IN_WAVE : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_BINNER_CNTL_0 { - struct { - unsigned int BINNING_MODE : 2; - unsigned int BIN_SIZE_X : 1; - unsigned int BIN_SIZE_Y : 1; - unsigned int BIN_SIZE_X_EXTEND : 3; - unsigned int BIN_SIZE_Y_EXTEND : 3; - unsigned int CONTEXT_STATES_PER_BIN : 3; - unsigned int PERSISTENT_STATES_PER_BIN : 5; - unsigned int DISABLE_START_OF_PRIM : 1; - unsigned int FPOVS_PER_BATCH : 8; - unsigned int OPTIMAL_BIN_SELECTION : 1; - unsigned int : 4; - } bits, bitfields; - struct { - unsigned int : 28; - unsigned int FLUSH_ON_BINNING_TRANSITION : 1; - unsigned int : 3; - } gfx09_1xPlus; - struct { - unsigned int : 29; - unsigned int BIN_MAPPING_MODE : 2; - unsigned int : 1; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_BINNER_CNTL_1 { - struct { - unsigned int MAX_ALLOC_COUNT : 16; - unsigned int MAX_PRIM_PER_BATCH : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_SC_BINNER_CNTL_2 { - struct { - unsigned int BIN_SIZE_X_MULT_BY_1P5X : 1; - unsigned int BIN_SIZE_Y_MULT_BY_1P5X : 1; - unsigned int ENABLE_LIGHT_VOLUME_RENDERING_OPTIMIZATION : 1; - unsigned int DUAL_LIGHT_SHAFT_IN_DRAW : 1; - unsigned int : 3; - unsigned int CONTEXT_DONE_EVENTS_PER_BIN : 4; - unsigned int ZPP_ENABLED : 1; - unsigned int ZPP_OPTIMIZATION_ENABLED : 1; - unsigned int ZPP_AREA_THRESHOLD : 8; - unsigned int DISABLE_NOPCEXPORT_BREAKBATCH_CONDITION : 1; - unsigned int : 10; - } bits, bitfields; -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int : 4; - unsigned int LIGHT_SHAFT_DRAW_CALL_LIMIT : 3; - unsigned int : 25; - } nv3x; -#endif -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int : 4; - unsigned int LIGHT_SHAFT_DRAW_CALL_LIMIT : 3; - unsigned int : 25; - } phx1; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union PA_SC_BINNER_CNTL_OVERRIDE { - struct { - unsigned int BINNING_MODE : 2; - unsigned int : 8; - unsigned int CONTEXT_STATES_PER_BIN : 3; - unsigned int PERSISTENT_STATES_PER_BIN : 5; - unsigned int : 1; - unsigned int FPOVS_PER_BATCH : 8; - unsigned int DIRECT_OVERRIDE_MODE : 1; - unsigned int OVERRIDE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_BINNER_EVENT_CNTL_0 { - struct { - unsigned int RESERVED_0 : 2; - unsigned int SAMPLE_STREAMOUTSTATS1 : 2; - unsigned int SAMPLE_STREAMOUTSTATS2 : 2; - unsigned int SAMPLE_STREAMOUTSTATS3 : 2; - unsigned int CACHE_FLUSH_TS : 2; - unsigned int CONTEXT_DONE : 2; - unsigned int CACHE_FLUSH : 2; - unsigned int CS_PARTIAL_FLUSH : 2; - unsigned int VGT_STREAMOUT_SYNC : 2; - unsigned int RESERVED_9 : 2; - unsigned int VGT_STREAMOUT_RESET : 2; - unsigned int END_OF_PIPE_INCR_DE : 2; - unsigned int END_OF_PIPE_IB_END : 2; - unsigned int RST_PIX_CNT : 2; - unsigned int BREAK_BATCH : 2; - unsigned int VS_PARTIAL_FLUSH : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_BINNER_EVENT_CNTL_1 { - struct { - unsigned int PS_PARTIAL_FLUSH : 2; - unsigned int FLUSH_HS_OUTPUT : 2; - unsigned int FLUSH_DFSM : 2; - unsigned int RESET_TO_LOWEST_VGT : 2; - unsigned int CACHE_FLUSH_AND_INV_TS_EVENT : 2; - unsigned int : 2; - unsigned int CACHE_FLUSH_AND_INV_EVENT : 2; - unsigned int PERFCOUNTER_START : 2; - unsigned int PERFCOUNTER_STOP : 2; - unsigned int PIPELINESTAT_START : 2; - unsigned int PIPELINESTAT_STOP : 2; - unsigned int PERFCOUNTER_SAMPLE : 2; - unsigned int FLUSH_ES_OUTPUT : 2; - unsigned int : 2; - unsigned int SAMPLE_PIPELINESTAT : 2; - unsigned int SO_VGTSTREAMOUT_FLUSH : 2; - } bits, bitfields; - struct { - unsigned int : 26; - unsigned int FLUSH_GS_OUTPUT : 2; - unsigned int : 4; - } gfx09; - struct { - unsigned int : 10; - unsigned int ZPASS_DONE : 2; - unsigned int : 20; - } gfx09_10; - struct { - unsigned int : 26; - unsigned int BIN_CONF_OVERRIDE_CHECK : 2; - unsigned int : 4; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 10; - unsigned int WAIT_SYNC : 2; - unsigned int : 20; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_BINNER_EVENT_CNTL_2 { - struct { - unsigned int SAMPLE_STREAMOUTSTATS : 2; - unsigned int RESET_VTX_CNT : 2; - unsigned int BLOCK_CONTEXT_DONE : 2; - unsigned int : 2; - unsigned int VGT_FLUSH : 2; - unsigned int TGID_ROLLOVER : 2; - unsigned int SQ_NON_EVENT : 2; - unsigned int SC_SEND_DB_VPZ : 2; - unsigned int BOTTOM_OF_PIPE_TS : 2; - unsigned int : 2; - unsigned int DB_CACHE_FLUSH_AND_INV : 2; - unsigned int FLUSH_AND_INV_DB_DATA_TS : 2; - unsigned int FLUSH_AND_INV_DB_META : 2; - unsigned int FLUSH_AND_INV_CB_DATA_TS : 2; - unsigned int FLUSH_AND_INV_CB_META : 2; - unsigned int CS_DONE : 2; - } bits, bitfields; - struct { - unsigned int : 6; - unsigned int CS_CONTEXT_DONE : 2; - unsigned int : 10; - unsigned int FLUSH_SX_TS : 2; - unsigned int : 12; - } gfx09; - struct { - unsigned int : 6; - unsigned int RESERVED_35 : 2; - unsigned int : 10; - unsigned int RESERVED_41 : 2; - unsigned int : 12; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_BINNER_EVENT_CNTL_3 { - struct { - unsigned int PS_DONE : 2; - unsigned int FLUSH_AND_INV_CB_PIXEL_DATA : 2; - unsigned int : 2; - unsigned int THREAD_TRACE_START : 2; - unsigned int THREAD_TRACE_STOP : 2; - unsigned int THREAD_TRACE_MARKER : 2; - unsigned int : 2; - unsigned int THREAD_TRACE_FINISH : 2; - unsigned int PIXEL_PIPE_STAT_CONTROL : 2; - unsigned int PIXEL_PIPE_STAT_DUMP : 2; - unsigned int PIXEL_PIPE_STAT_RESET : 2; - unsigned int CONTEXT_SUSPEND : 2; - unsigned int OFFCHIP_HS_DEALLOC : 2; - unsigned int ENABLE_NGG_PIPELINE : 2; - unsigned int : 4; - } bits, bitfields; - struct { - unsigned int : 4; - unsigned int SX_CB_RAT_ACK_REQUEST : 2; - unsigned int : 6; - unsigned int THREAD_TRACE_FLUSH : 2; - unsigned int : 16; - unsigned int RESERVED_63 : 2; - } gfx09; - struct { - unsigned int : 28; - unsigned int ENABLE_LEGACY_PIPELINE : 2; - unsigned int : 2; - } gfx09_10; - struct { - unsigned int : 4; - unsigned int RESERVED_50 : 2; - unsigned int : 6; - unsigned int THREAD_TRACE_DRAW : 2; - unsigned int : 16; - unsigned int DRAW_DONE : 2; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 28; - unsigned int ENABLE_PIPELINE_NOT_USED : 2; - unsigned int : 2; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_BINNER_PERF_CNTL_0 { - struct { - unsigned int BIN_HIST_NUM_PRIMS_THRESHOLD : 10; - unsigned int BATCH_HIST_NUM_PRIMS_THRESHOLD : 10; - unsigned int BIN_HIST_NUM_CONTEXT_THRESHOLD : 3; - unsigned int BATCH_HIST_NUM_CONTEXT_THRESHOLD : 3; - unsigned int : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_BINNER_PERF_CNTL_1 { - struct { - unsigned int BIN_HIST_NUM_PERSISTENT_STATE_THRESHOLD : 5; - unsigned int BATCH_HIST_NUM_PERSISTENT_STATE_THRESHOLD : 5; - unsigned int BATCH_HIST_NUM_TRIV_REJECTED_PRIMS_THRESHOLD : 16; - unsigned int : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_BINNER_PERF_CNTL_2 { - struct { - unsigned int BATCH_HIST_NUM_ROWS_PER_PRIM_THRESHOLD : 11; - unsigned int BATCH_HIST_NUM_COLUMNS_PER_ROW_THRESHOLD : 11; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_BINNER_PERF_CNTL_3 { - struct { - unsigned int BATCH_HIST_NUM_PS_WAVE_BREAKS_THRESHOLD : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_BINNER_TIMEOUT_COUNTER { - struct { - unsigned int THRESHOLD : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_CENTROID_PRIORITY_0 { - struct { - unsigned int DISTANCE_0 : 4; - unsigned int DISTANCE_1 : 4; - unsigned int DISTANCE_2 : 4; - unsigned int DISTANCE_3 : 4; - unsigned int DISTANCE_4 : 4; - unsigned int DISTANCE_5 : 4; - unsigned int DISTANCE_6 : 4; - unsigned int DISTANCE_7 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_CENTROID_PRIORITY_1 { - struct { - unsigned int DISTANCE_8 : 4; - unsigned int DISTANCE_9 : 4; - unsigned int DISTANCE_10 : 4; - unsigned int DISTANCE_11 : 4; - unsigned int DISTANCE_12 : 4; - unsigned int DISTANCE_13 : 4; - unsigned int DISTANCE_14 : 4; - unsigned int DISTANCE_15 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_CLIPRECT_0_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_CLIPRECT_0_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_CLIPRECT_1_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_CLIPRECT_1_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_CLIPRECT_2_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_CLIPRECT_2_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_CLIPRECT_3_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_CLIPRECT_3_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_CLIPRECT_RULE { - struct { - unsigned int CLIP_RULE : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_CONSERVATIVE_RASTERIZATION_CNTL { - struct { - unsigned int OVER_RAST_ENABLE : 1; - unsigned int OVER_RAST_SAMPLE_SELECT : 4; - unsigned int UNDER_RAST_ENABLE : 1; - unsigned int UNDER_RAST_SAMPLE_SELECT : 4; - unsigned int PBB_UNCERTAINTY_REGION_ENABLE : 1; - unsigned int ZMM_TRI_EXTENT : 1; - unsigned int ZMM_TRI_OFFSET : 1; - unsigned int OVERRIDE_OVER_RAST_INNER_TO_NORMAL : 1; - unsigned int OVERRIDE_UNDER_RAST_INNER_TO_NORMAL : 1; - unsigned int DEGENERATE_OVERRIDE_INNER_TO_NORMAL_DISABLE : 1; - unsigned int UNCERTAINTY_REGION_MODE : 2; - unsigned int OUTER_UNCERTAINTY_EDGERULE_OVERRIDE : 1; - unsigned int INNER_UNCERTAINTY_EDGERULE_OVERRIDE : 1; - unsigned int NULL_SQUAD_AA_MASK_ENABLE : 1; - unsigned int COVERAGE_AA_MASK_ENABLE : 1; - unsigned int PREZ_AA_MASK_ENABLE : 1; - unsigned int POSTZ_AA_MASK_ENABLE : 1; - unsigned int CENTROID_SAMPLE_OVERRIDE : 1; - unsigned int : 7; - } bits, bitfields; - struct { - unsigned int : 25; - unsigned int UNCERTAINTY_REGION_MULT : 2; - unsigned int UNCERTAINTY_REGION_PBB_MULT : 2; - unsigned int : 3; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_DSM_CNTL { - struct { - unsigned int FORCE_EOV_REZ_0 : 1; - unsigned int FORCE_EOV_REZ_1 : 1; - unsigned int : 30; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_EDGERULE { - struct { - unsigned int ER_TRI : 4; - unsigned int ER_POINT : 4; - unsigned int ER_RECT : 4; - unsigned int ER_LINE_LR : 6; - unsigned int ER_LINE_RL : 6; - unsigned int ER_LINE_TB : 4; - unsigned int ER_LINE_BT : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_ENHANCE { - struct { - unsigned int ENABLE_PA_SC_OUT_OF_ORDER : 1; - unsigned int DISABLE_SC_DB_TILE_FIX : 1; - unsigned int DISABLE_AA_MASK_FULL_FIX : 1; - unsigned int ENABLE_1XMSAA_SAMPLE_LOCATIONS : 1; - unsigned int ENABLE_1XMSAA_SAMPLE_LOC_CENTROID : 1; - unsigned int DISABLE_SCISSOR_FIX : 1; - unsigned int SEND_UNLIT_STILES_TO_PACKER : 1; - unsigned int DISABLE_DUALGRAD_PERF_OPTIMIZATION : 1; - unsigned int DISABLE_SC_PROCESS_RESET_PRIM : 1; - unsigned int DISABLE_SC_PROCESS_RESET_SUPERTILE : 1; - unsigned int DISABLE_SC_PROCESS_RESET_TILE : 1; - unsigned int DISABLE_PA_SC_GUIDANCE : 1; - unsigned int DISABLE_EOV_ALL_CTRL_ONLY_COMBINATIONS : 1; - unsigned int ENABLE_MULTICYCLE_BUBBLE_FREEZE : 1; - unsigned int DISABLE_OUT_OF_ORDER_PA_SC_GUIDANCE : 1; - unsigned int ENABLE_OUT_OF_ORDER_POLY_MODE : 1; - unsigned int DISABLE_OUT_OF_ORDER_EOP_SYNC_NULL_PRIMS_LAST : 1; - unsigned int DISABLE_OUT_OF_ORDER_THRESHOLD_SWITCHING : 1; - unsigned int ENABLE_OUT_OF_ORDER_THRESHOLD_SWITCH_AT_EOPG_ONLY : 1; - unsigned int DISABLE_OUT_OF_ORDER_DESIRED_FIFO_EMPTY_SWITCHING : 1; - unsigned int DISABLE_OUT_OF_ORDER_SELECTED_FIFO_EMPTY_SWITCHING : 1; - unsigned int DISABLE_OUT_OF_ORDER_EMPTY_SWITCHING_HYSTERYSIS : 1; - unsigned int ENABLE_OUT_OF_ORDER_DESIRED_FIFO_IS_NEXT_FEID : 1; - unsigned int DISABLE_OOO_NO_EOPG_SKEW_DESIRED_FIFO_IS_CURRENT_FIFO : 1; - unsigned int OOO_DISABLE_EOP_ON_FIRST_LIVE_PRIM_HIT : 1; - unsigned int OOO_DISABLE_EOPG_SKEW_THRESHOLD_SWITCHING : 1; - unsigned int DISABLE_EOP_LINE_STIPPLE_RESET : 1; - unsigned int DISABLE_VPZ_EOP_LINE_STIPPLE_RESET : 1; - unsigned int IOO_DISABLE_SCAN_UNSELECTED_FIFOS_FOR_DUAL_GFX_RING_CHANGE : 1; - unsigned int OOO_USE_ABSOLUTE_FIFO_COUNT_IN_THRESHOLD_SWITCHING : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_ENHANCE_1 { - struct { - unsigned int REALIGN_DQUADS_OVERRIDE_ENABLE : 1; - unsigned int REALIGN_DQUADS_OVERRIDE : 2; - unsigned int DISABLE_SC_BINNING : 1; - unsigned int BYPASS_PBB : 1; - unsigned int : 1; - unsigned int ECO_SPARE1 : 1; - unsigned int ECO_SPARE2 : 1; - unsigned int ECO_SPARE3 : 1; - unsigned int DISABLE_SC_PROCESS_RESET_PBB : 1; - unsigned int DISABLE_PBB_SCISSOR_OPT : 1; - unsigned int : 3; - unsigned int DISABLE_SC_DB_TILE_INTF_FINE_CLOCK_GATE : 1; - unsigned int : 1; - unsigned int DISABLE_PACKER_ODC_ENHANCE : 1; - unsigned int : 1; - unsigned int OPTIMAL_BIN_SELECTION : 1; - unsigned int : 1; - unsigned int DISABLE_PBB_CLK_OPTIMIZATION : 1; - unsigned int DISABLE_PBB_SCISSOR_CLK_OPTIMIZATION : 1; - unsigned int DISABLE_PBB_BINNING_CLK_OPTIMIZATION : 1; - unsigned int : 9; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int DEBUG_PIXEL_PICKER_XY_UNPACK : 1; - unsigned int : 4; - unsigned int ALLOW_SCALE_LINE_WIDTH_PAD_WITH_BINNING : 1; - unsigned int : 14; - } most; - struct { - unsigned int : 11; - unsigned int ENABLE_DFSM_FLUSH_EVENT_TO_FLUSH_POPS_CAM : 1; - unsigned int : 7; - unsigned int DISABLE_FORCE_SOP_ALL_EVENTS : 1; - unsigned int : 12; - } core; - struct { - unsigned int : 13; - unsigned int DISABLE_PACKER_GRAD_FDCE_ENHANCE : 1; - unsigned int : 18; - } gfx09; - struct { - unsigned int : 23; - unsigned int RSVD : 9; - } gfx09_0; - struct { - unsigned int : 5; - unsigned int ECO_SPARE0 : 1; - unsigned int : 9; - unsigned int DISABLE_SC_PIPELINE_RESET_LEGACY_MODE_TRANSITION : 1; - unsigned int : 16; - } gfx09_10; - struct { - unsigned int : 31; - unsigned int RSVD : 1; - } gfx09_1x; - struct { - unsigned int : 23; - unsigned int DISABLE_INTF_CG : 1; - unsigned int IOO_DISABLE_EOP_ON_FIRST_LIVE_PRIM_HIT : 1; - unsigned int DISABLE_SHADER_PROFILING_FOR_POWER : 1; - unsigned int FLUSH_ON_BINNING_TRANSITION : 1; - unsigned int DISABLE_QUAD_PROC_FDCE_ENHANCE : 1; - unsigned int DISABLE_SC_PS_PA_ARBITER_FIX : 1; - unsigned int DISABLE_SC_PS_PA_ARBITER_FIX_1 : 1; - unsigned int PASS_VPZ_EVENT_TO_SPI : 1; - unsigned int : 1; - } gfx09_1xPlus; - struct { - unsigned int : 13; - unsigned int DEBUG_PIXEL_PICKER_COUNT_PIXELS : 1; - unsigned int : 18; - } gfx10CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 5; - unsigned int DISABLE_NONBINNED_LIVE_PRIM_DG1_LS0_CL0_EOPKT_POKE : 1; - unsigned int : 26; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_ENHANCE_2 { - struct { - unsigned int : 11; - unsigned int PBB_TIMEOUT_THRESHOLD_MODE : 1; - unsigned int : 20; - } most; - struct { - unsigned int : 24; - unsigned int DISABLE_PBB_EOP_INSERTION_FOR_MIXED_BINNING_AND_IMMEDIATE : 1; - unsigned int DISABLE_DFSM_FLUSH : 1; - unsigned int : 6; - } gfx10; - struct { - unsigned int ECO_SPARE0 : 1; - unsigned int ECO_SPARE1 : 1; - unsigned int ECO_SPARE2 : 1; - unsigned int ECO_SPARE3 : 1; - unsigned int : 26; - unsigned int RSVD : 2; - } gfx101; - struct { - unsigned int DISABLE_SC_MEM_MACRO_FINE_CLOCK_GATE : 1; - unsigned int DISABLE_SC_DB_QUAD_INTF_FINE_CLOCK_GATE : 1; - unsigned int DISABLE_SC_BCI_QUAD_INTF_FINE_CLOCK_GATE : 1; - unsigned int DISABLE_SC_BCI_PRIM_INTF_FINE_CLOCK_GATE : 1; - unsigned int : 26; - unsigned int DISABLE_MAX_DEALLOC_FORCE_EOV_RESET_N_WAVES_COUNT : 1; - unsigned int RSVD : 1; - } gfx103PlusExclusive; - struct { - unsigned int : 20; - unsigned int ENABLE_BLOCKING_WRITES_OF_GEN2_REG : 1; - unsigned int : 11; - } gfx10Core; - struct { - unsigned int : 4; - unsigned int ENABLE_LPOV_WAVE_BREAK : 1; - unsigned int ENABLE_FPOV_WAVE_BREAK : 1; - unsigned int : 1; - unsigned int ENABLE_SC_SEND_DB_VPZ_FOR_EN_PRIM_PAYLOAD : 1; - unsigned int DISABLE_BREAK_BATCH_ON_GFX_PIPE_SWITCH : 1; - unsigned int DISABLE_FULL_TILE_WAVE_BREAK : 1; - unsigned int ENABLE_VPZ_INJECTION_BEFORE_NULL_PRIMS : 1; - unsigned int : 1; - unsigned int DISABLE_PACKER_GRAD_FDCE_ENHANCE : 1; - unsigned int DISABLE_SC_SPI_INTF_EARLY_WAKEUP : 1; - unsigned int DISABLE_SC_BCI_INTF_EARLY_WAKEUP : 1; - unsigned int DISABLE_EXPOSED_GT_DETAIL_RATE_TILE_COV_ADJ : 1; - unsigned int PBB_WARP_CLK_MAIN_CLK_WAKEUP : 1; - unsigned int PBB_MAIN_CLK_REG_BUSY_WAKEUP : 1; - unsigned int DISABLE_BREAK_BATCH_ON_GFX_PIPELINE_RESET : 1; - unsigned int : 2; - unsigned int DISABLE_SC_DBR_DATAPATH_FGCG : 1; - unsigned int : 1; - unsigned int PROCESS_RESET_FORCE_STILE_MASK_TO_ZERO : 1; - unsigned int : 2; - unsigned int BREAK_WHEN_ONE_NULL_PRIM_BATCH : 1; - unsigned int NULL_PRIM_BREAK_BATCH_LIMIT : 3; - unsigned int : 2; - } gfx10Plus; - struct { - unsigned int : 12; - unsigned int RSVD : 20; - } rn; - struct { - unsigned int : 8; - unsigned int RSVD : 24; - } vg12_Vg20; - struct { - unsigned int RESERVED_0 : 1; - unsigned int RESERVED_1 : 1; - unsigned int RESERVED_2 : 1; - unsigned int RESERVED_3 : 1; - unsigned int RESERVED_4 : 1; - unsigned int RESERVED_5 : 1; - unsigned int : 26; - } vg12_Vg20_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_ENHANCE_3 { - struct { - unsigned int FORCE_USE_OF_SC_CENTROID_DATA : 1; - unsigned int : 31; - } bits, bitfields; - struct { - unsigned int : 2; - unsigned int DISABLE_RB_MASK_COPY_FOR_NONP2_SA_HARVEST : 1; - unsigned int : 29; - } gfx103Derivative; - struct { - unsigned int : 3; - unsigned int FORCE_PBB_WORKLOAD_MODE_TO_ZERO : 1; - unsigned int DISABLE_PKR_BCI_QUAD_NEW_PRIM_DATA_LOAD_OPTIMIZATION : 1; - unsigned int DISABLE_CP_CONTEXT_DONE_PERFCOUNT_SAMPLE_EN : 1; - unsigned int : 26; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 6; - unsigned int ENABLE_SINGLE_PA_EOPKT_FIRST_PHASE_FILTER : 1; - unsigned int ENABLE_SINGLE_PA_EOPKT_LAST_PHASE_FILTER : 1; - unsigned int ENABLE_SINGLE_PA_EOPKT_LAST_PHASE_FILTER_FOR_PBB_BINNED_PRIMS : 1; - unsigned int DISABLE_SET_VPZ_DIRTY_EOPKT_LAST_PHASE_ONLY : 1; - unsigned int DISABLE_PBB_EOP_OPTIMIZATION_WITH_SAME_CONTEXT_BATCHES : 1; - unsigned int DISABLE_FAST_NULL_PRIM_OPTIMIZATION : 1; - unsigned int USE_PBB_PRIM_STORAGE_WHEN_STALLED : 1; - unsigned int DISABLE_LIGHT_VOLUME_RENDERING_OPTIMIZATION : 1; - unsigned int : 18; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 2; - unsigned int DISABLE_RB_MASK_COPY_FOR_NONP2_SA_PAIR_HARVEST : 1; - unsigned int : 11; - unsigned int DISABLE_ZPRE_PASS_OPTIMIZATION : 1; - unsigned int DISABLE_EVENT_INCLUSION_IN_CONTEXT_STATES_PER_BIN : 1; - unsigned int DISABLE_PIXEL_WAIT_SYNC_COUNTERS : 1; - unsigned int DISABLE_SC_CPG_PSINVOC_SEDC_ISOLATION_ACCUM : 1; - unsigned int DISABLE_SC_QP_VRS_RATE_FB_FINE_CLOCK_GATE : 1; - unsigned int DISABLE_SC_QP_VRS_RATE_CACHE_RD_FINE_CLOCK_GATE : 1; - unsigned int DISABLE_PKR_FORCE_EOV_MAX_REZ_CNT_FOR_SPI_BACKPRESSURE_ONLY : 1; - unsigned int DISABLE_PKR_FORCE_EOV_MAX_CLK_CNT_FOR_SPI_BACKPRESSURE_ONLY : 1; - unsigned int DO_NOT_INCLUDE_OREO_WAVEID_IN_FORCE_EOV_MAX_CNT_DISABLE : 1; - unsigned int DISABLE_PWS_PRE_DEPTH_WAIT_SYNC_VPZ_INSERTION : 1; - unsigned int PKR_CNT_FORCE_EOV_AT_QS_EMPTY_ONLY : 1; - unsigned int PKR_S0_FORCE_EOV_STALL : 1; - unsigned int PKR_S1_FORCE_EOV_STALL : 1; - unsigned int PKR_S2_FORCE_EOV_STALL : 1; - unsigned int ECO_SPARE0 : 1; - unsigned int ECO_SPARE1 : 1; - unsigned int ECO_SPARE2 : 1; - unsigned int ECO_SPARE3 : 1; - } gfx11; -#endif -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 6; - unsigned int RSVD : 26; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 6; - unsigned int RSVD : 26; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 6; - unsigned int ENABLE_SINGLE_PA_EOPKT_FIRST_PHASE_FILTER : 1; - unsigned int ENABLE_SINGLE_PA_EOPKT_LAST_PHASE_FILTER : 1; - unsigned int ENABLE_SINGLE_PA_EOPKT_LAST_PHASE_FILTER_FOR_PBB_BINNED_PRIMS : 1; - unsigned int DISABLE_SET_VPZ_DIRTY_EOPKT_LAST_PHASE_ONLY : 1; - unsigned int RSVD : 22; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 6; - unsigned int ENABLE_SINGLE_PA_EOPKT_FIRST_PHASE_FILTER : 1; - unsigned int ENABLE_SINGLE_PA_EOPKT_LAST_PHASE_FILTER : 1; - unsigned int ENABLE_SINGLE_PA_EOPKT_LAST_PHASE_FILTER_FOR_PBB_BINNED_PRIMS : 1; - unsigned int DISABLE_SET_VPZ_DIRTY_EOPKT_LAST_PHASE_ONLY : 1; - unsigned int RSVD : 22; - } nv24; -#endif - struct { - unsigned int : 6; - unsigned int ENABLE_SINGLE_PA_EOPKT_FIRST_PHASE_FILTER : 1; - unsigned int ENABLE_SINGLE_PA_EOPKT_LAST_PHASE_FILTER : 1; - unsigned int ENABLE_SINGLE_PA_EOPKT_LAST_PHASE_FILTER_FOR_PBB_BINNED_PRIMS : 1; - unsigned int DISABLE_SET_VPZ_DIRTY_EOPKT_LAST_PHASE_ONLY : 1; - unsigned int RSVD : 22; - } raphael; - struct { - unsigned int : 6; - unsigned int ENABLE_SINGLE_PA_EOPKT_FIRST_PHASE_FILTER : 1; - unsigned int ENABLE_SINGLE_PA_EOPKT_LAST_PHASE_FILTER : 1; - unsigned int ENABLE_SINGLE_PA_EOPKT_LAST_PHASE_FILTER_FOR_PBB_BINNED_PRIMS : 1; - unsigned int DISABLE_SET_VPZ_DIRTY_EOPKT_LAST_PHASE_ONLY : 1; - unsigned int RSVD : 22; - } rembrandt; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_ENHANCE_INTERNAL { - struct { - unsigned int RESERVED : 32; - } gfx103; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_FIFO_DEPTH_CNTL { - struct { - unsigned int DEPTH : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_FIFO_SIZE { - struct { - unsigned int SC_FRONTEND_PRIM_FIFO_SIZE : 6; - unsigned int SC_BACKEND_PRIM_FIFO_SIZE : 9; - unsigned int SC_HIZ_TILE_FIFO_SIZE : 6; - unsigned int SC_EARLYZ_TILE_FIFO_SIZE : 11; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_FORCE_EOV_MAX_CNTS { - struct { - unsigned int FORCE_EOV_MAX_CLK_CNT : 16; - unsigned int FORCE_EOV_MAX_REZ_CNT : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_GENERIC_SCISSOR_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_GENERIC_SCISSOR_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_HP3D_TRAP_SCREEN_COUNT { - struct { - unsigned int COUNT : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_HP3D_TRAP_SCREEN_H { - struct { - unsigned int X_COORD : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_HP3D_TRAP_SCREEN_HV_EN { - struct { - unsigned int ENABLE_HV_PRE_SHADER : 1; - unsigned int FORCE_PRE_SHADER_ALL_PIXELS : 1; - unsigned int : 30; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_HP3D_TRAP_SCREEN_HV_LOCK { - struct { - unsigned int DISABLE_NON_PRIV_WRITES : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_HP3D_TRAP_SCREEN_OCCURRENCE { - struct { - unsigned int COUNT : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_HP3D_TRAP_SCREEN_V { - struct { - unsigned int Y_COORD : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_IF_FIFO_SIZE { - struct { - unsigned int SC_DB_TILE_IF_FIFO_SIZE : 6; - unsigned int SC_DB_QUAD_IF_FIFO_SIZE : 6; - unsigned int SC_SPI_IF_FIFO_SIZE : 6; - unsigned int SC_BCI_IF_FIFO_SIZE : 6; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_LINE_CNTL { - struct { - unsigned int : 9; - unsigned int EXPAND_LINE_WIDTH : 1; - unsigned int LAST_PIXEL : 1; - unsigned int PERPENDICULAR_ENDCAP_ENA : 1; - unsigned int DX10_DIAMOND_TEST_ENA : 1; - unsigned int : 19; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int EXTRA_DX_DY_PRECISION : 1; - unsigned int : 18; - } gfx09_1xPlus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_LINE_STIPPLE { - struct { - unsigned int LINE_PATTERN : 16; - unsigned int REPEAT_COUNT : 8; - unsigned int : 4; - unsigned int PATTERN_BIT_ORDER : 1; - unsigned int AUTO_RESET_CNTL : 2; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_LINE_STIPPLE_STATE { - struct { - unsigned int CURRENT_PTR : 4; - unsigned int : 4; - unsigned int CURRENT_COUNT : 8; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_MODE_CNTL_0 { - struct { - unsigned int MSAA_ENABLE : 1; - unsigned int VPORT_SCISSOR_ENABLE : 1; - unsigned int LINE_STIPPLE_ENABLE : 1; - unsigned int SEND_UNLIT_STILES_TO_PKR : 1; - unsigned int : 2; - unsigned int COARSE_TILE_STARTS_ON_EVEN_RB : 1; - unsigned int : 25; - } bits, bitfields; - struct { - unsigned int : 4; - unsigned int SCALE_LINE_WIDTH_PAD : 1; - unsigned int : 27; - } most; - struct { - unsigned int : 5; - unsigned int ALTERNATE_RBS_PER_TILE : 1; - unsigned int : 26; - } core; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_MODE_CNTL_1 { - struct { - unsigned int WALK_SIZE : 1; - unsigned int WALK_ALIGNMENT : 1; - unsigned int WALK_ALIGN8_PRIM_FITS_ST : 1; - unsigned int WALK_FENCE_ENABLE : 1; - unsigned int WALK_FENCE_SIZE : 3; - unsigned int SUPERTILE_WALK_ORDER_ENABLE : 1; - unsigned int TILE_WALK_ORDER_ENABLE : 1; - unsigned int TILE_COVER_DISABLE : 1; - unsigned int TILE_COVER_NO_SCISSOR : 1; - unsigned int ZMM_LINE_EXTENT : 1; - unsigned int ZMM_LINE_OFFSET : 1; - unsigned int ZMM_RECT_EXTENT : 1; - unsigned int KILL_PIX_POST_HI_Z : 1; - unsigned int KILL_PIX_POST_DETAIL_MASK : 1; - unsigned int PS_ITER_SAMPLE : 1; - unsigned int MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE : 1; - unsigned int MULTI_GPU_SUPERTILE_ENABLE : 1; - unsigned int GPU_ID_OVERRIDE_ENABLE : 1; - unsigned int GPU_ID_OVERRIDE : 4; - unsigned int MULTI_GPU_PRIM_DISCARD_ENABLE : 1; - unsigned int FORCE_EOV_CNTDWN_ENABLE : 1; - unsigned int FORCE_EOV_REZ_ENABLE : 1; - unsigned int OUT_OF_ORDER_PRIMITIVE_ENABLE : 1; - unsigned int OUT_OF_ORDER_WATER_MARK : 3; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_NGG_MODE_CNTL { - struct { - unsigned int MAX_DEALLOCS_IN_WAVE : 11; - unsigned int : 21; - } bits, bitfields; - struct { - unsigned int : 16; - unsigned int MAX_FPOVS_IN_WAVE : 8; - unsigned int : 8; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int DISABLE_FPOG_AND_DEALLOC_CONFLICT : 1; - unsigned int DISABLE_MAX_DEALLOC : 1; - unsigned int DISABLE_MAX_ATTRIBUTES : 1; - unsigned int : 9; - unsigned int MAX_ATTRIBUTES_IN_WAVE : 8; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_P3D_TRAP_SCREEN_COUNT { - struct { - unsigned int COUNT : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_P3D_TRAP_SCREEN_H { - struct { - unsigned int X_COORD : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_P3D_TRAP_SCREEN_HV_EN { - struct { - unsigned int ENABLE_HV_PRE_SHADER : 1; - unsigned int FORCE_PRE_SHADER_ALL_PIXELS : 1; - unsigned int : 30; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_P3D_TRAP_SCREEN_HV_LOCK { - struct { - unsigned int DISABLE_NON_PRIV_WRITES : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_P3D_TRAP_SCREEN_OCCURRENCE { - struct { - unsigned int COUNT : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_P3D_TRAP_SCREEN_V { - struct { - unsigned int Y_COORD : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_SC_PACKER_WAVE_ID_CNTL { - struct { - unsigned int WAVE_TABLE_SIZE : 10; - unsigned int SC_DB_WAVE_IF_FIFO_SIZE : 6; - unsigned int DISABLE_SC_DB_WAVE_IF_FGCG_EN : 1; - unsigned int SC_SPI_WAVE_IF_FIFO_SIZE : 6; - unsigned int DISABLE_SC_SPI_WAVE_IF_FGCG_EN : 1; - unsigned int DEBUG_CONFLICT_QUAD : 4; - unsigned int : 3; - unsigned int DISABLE_OREO_CONFLICT_QUAD : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union PA_SC_PBB_OVERRIDE_FLAG { - struct { - unsigned int OVERRIDE : 1; - unsigned int PIPE_ID : 1; - unsigned int : 30; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } gfx09_1xPlus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx09_1xPlus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER4_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER4_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER4_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER5_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER5_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER5_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER6_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER6_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER6_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER7_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER7_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PERFCOUNTER7_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_PKR_WAVE_TABLE_CNTL { - struct { - unsigned int SIZE : 6; - unsigned int : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_RASTER_CONFIG { - struct { - unsigned int RB_MAP_PKR0 : 2; - unsigned int RB_MAP_PKR1 : 2; - unsigned int RB_XSEL2 : 2; - unsigned int RB_XSEL : 1; - unsigned int RB_YSEL : 1; - unsigned int PKR_MAP : 2; - unsigned int PKR_XSEL : 2; - unsigned int PKR_YSEL : 2; - unsigned int PKR_XSEL2 : 2; - unsigned int SC_MAP : 2; - unsigned int SC_XSEL : 2; - unsigned int SC_YSEL : 2; - unsigned int : 2; - unsigned int SE_MAP : 2; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 26; - unsigned int SE_XSEL : 3; - unsigned int SE_YSEL : 3; - } gfx09; - struct { - unsigned int : 26; - unsigned int SE_XSEL : 2; - unsigned int SE_YSEL : 2; - unsigned int : 2; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_RASTER_CONFIG_1 { - struct { - unsigned int SE_PAIR_MAP : 2; - unsigned int : 30; - } bits, bitfields; - struct { - unsigned int : 2; - unsigned int SE_PAIR_XSEL : 3; - unsigned int SE_PAIR_YSEL : 3; - unsigned int : 24; - } gfx09; - struct { - unsigned int : 2; - unsigned int SE_PAIR_XSEL : 2; - unsigned int SE_PAIR_YSEL : 2; - unsigned int : 26; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_SCREEN_SCISSOR_BR { - struct { - unsigned int BR_X : 16; - unsigned int BR_Y : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_SCREEN_SCISSOR_TL { - struct { - unsigned int TL_X : 16; - unsigned int TL_Y : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_SHADER_CONTROL { - struct { - unsigned int REALIGN_DQUADS_AFTER_N_WAVES : 2; - unsigned int : 30; - } bits, bitfields; - struct { - unsigned int : 2; - unsigned int LOAD_COLLISION_WAVEID : 1; - unsigned int LOAD_INTRAWAVE_COLLISION : 1; - unsigned int : 28; - } core; - struct { - unsigned int : 5; - unsigned int WAVE_BREAK_REGION_SIZE : 2; - unsigned int : 25; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 7; - unsigned int DISABLE_OREO_CONFLICT_QUAD : 1; - unsigned int : 24; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_TILE_STEERING_CREST_OVERRIDE { - struct { - unsigned int ONE_RB_MODE_ENABLE : 1; - unsigned int SE_SELECT : 2; - unsigned int : 2; - unsigned int RB_SELECT : 2; - unsigned int : 25; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int SA_SELECT : 3; - unsigned int : 20; - unsigned int FORCE_TILE_STEERING_OVERRIDE_USE : 1; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_TILE_STEERING_OVERRIDE { - struct { - unsigned int ENABLE : 1; - unsigned int : 31; - } bits, bitfields; - struct { - unsigned int : 1; - unsigned int NUM_SE : 2; - unsigned int : 2; - unsigned int NUM_RB_PER_SE : 2; - unsigned int : 25; - } most; - struct { - unsigned int : 20; - unsigned int NUM_PACKER_PER_SC : 1; - unsigned int : 11; - } gfx101; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 20; - unsigned int NUM_PACKER_PER_SC : 2; - unsigned int : 10; - } gfx104Plus; -#endif - struct { - unsigned int : 12; - unsigned int NUM_SC : 2; - unsigned int : 2; - unsigned int NUM_RB_PER_SC : 2; - unsigned int : 14; - } gfx10Plus; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 20; - unsigned int NUM_PACKER_PER_SC : 2; - unsigned int : 10; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 20; - unsigned int NUM_PACKER_PER_SC : 2; - unsigned int : 10; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 20; - unsigned int NUM_PACKER_PER_SC : 2; - unsigned int : 10; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 20; - unsigned int NUM_PACKER_PER_SC : 2; - unsigned int : 10; - } nv24; -#endif - struct { - unsigned int : 20; - unsigned int NUM_PACKER_PER_SC : 1; - unsigned int : 11; - } raphael; - struct { - unsigned int : 20; - unsigned int NUM_PACKER_PER_SC : 2; - unsigned int : 10; - } rembrandt; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_TRAP_SCREEN_COUNT { - struct { - unsigned int COUNT : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_TRAP_SCREEN_H { - struct { - unsigned int X_COORD : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_TRAP_SCREEN_HV_EN { - struct { - unsigned int ENABLE_HV_PRE_SHADER : 1; - unsigned int FORCE_PRE_SHADER_ALL_PIXELS : 1; - unsigned int : 30; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_TRAP_SCREEN_HV_LOCK { - struct { - unsigned int DISABLE_NON_PRIV_WRITES : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_TRAP_SCREEN_OCCURRENCE { - struct { - unsigned int COUNT : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_TRAP_SCREEN_V { - struct { - unsigned int Y_COORD : 14; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_0_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_0_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_1_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_1_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_2_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_2_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_3_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_3_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_4_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_4_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_5_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_5_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_6_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_6_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_7_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_7_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_8_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_8_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_9_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_9_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_10_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_10_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_11_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_11_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_12_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_12_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_13_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_13_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_14_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_14_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_15_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_SCISSOR_15_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_0 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_1 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_2 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_3 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_4 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_5 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_6 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_7 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_8 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_9 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_10 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_11 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_12 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_13 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_14 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMAX_15 { - struct { - unsigned int VPORT_ZMAX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_0 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_1 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_2 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_3 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_4 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_5 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_6 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_7 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_8 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_9 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_10 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_11 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_12 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_13 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_14 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_VPORT_ZMIN_15 { - struct { - unsigned int VPORT_ZMIN : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_SC_VRS_OVERRIDE_CNTL { - struct { - unsigned int VRS_OVERRIDE_RATE_COMBINER_MODE : 3; - unsigned int : 1; - unsigned int VRS_RATE : 4; - unsigned int : 4; - unsigned int VRS_SURFACE_ENABLE : 1; - unsigned int RATE_HINT_WRITE_BACK_ENABLE : 1; - unsigned int VRS_FEEDBACK_RATE_OVERRIDE : 1; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_SC_VRS_RATE_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_SC_VRS_RATE_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 20; - unsigned int TB_SYNC_SIM_ID : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_SC_VRS_RATE_CACHE_CNTL { - struct { - unsigned int BIG_PAGE_RD : 1; - unsigned int BIG_PAGE_WR : 1; - unsigned int L1_RD_POLICY : 2; - unsigned int L2_RD_POLICY : 2; - unsigned int L2_WR_POLICY : 2; - unsigned int LLC_RD_NOALLOC : 1; - unsigned int LLC_WR_NOALLOC : 1; - unsigned int NOFILL_RD : 1; - unsigned int NOFILL_WR : 1; - unsigned int PERF_CNTR_EN_RD : 1; - unsigned int PERF_CNTR_EN_WR : 1; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_SC_VRS_RATE_FEEDBACK_BASE { - struct { - unsigned int BASE_256B : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_SC_VRS_RATE_FEEDBACK_BASE_EXT { - struct { - unsigned int BASE_256B : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_SC_VRS_RATE_FEEDBACK_SIZE_XY { - struct { - unsigned int X_MAX : 11; - unsigned int : 5; - unsigned int Y_MAX : 11; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_SC_VRS_RATE_SIZE_XY { - struct { - unsigned int X_MAX : 11; - unsigned int : 5; - unsigned int Y_MAX : 11; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_SC_VRS_SURFACE_CNTL { - struct { - unsigned int : 6; - unsigned int VRC_CONTEXT_DONE_SYNC_DISABLE : 1; - unsigned int VRS_FEEDBACK_RATE_OVERRIDE : 1; - unsigned int VRC_FLUSH_EVENT_MASK_DISABLE : 5; - unsigned int VRC_PREFETCH_DISABLE : 1; - unsigned int VRC_FLUSH_NO_INV_DISABLE : 1; - unsigned int VRC_NONSTALLING_FLUSH_DISABLE : 1; - unsigned int VRC_PARTIAL_FLUSH_DISABLE : 1; - unsigned int VRC_AUTO_FLUSH : 1; - unsigned int VRC_EOP_SYNC_DISABLE : 1; - unsigned int VRC_MAX_TAGS : 7; - unsigned int VRC_EVICT_POINT : 6; - } bits, bitfields; -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int : 5; - unsigned int VRC_REPROBE_DISABLE : 1; - unsigned int : 26; - } apu11; -#endif -#if CHIP_HDR_NAVI32 - struct { - unsigned int : 5; - unsigned int VRC_REPROBE_DISABLE : 1; - unsigned int : 26; - } nv32; -#endif -#if CHIP_HDR_NAVI33 - struct { - unsigned int : 5; - unsigned int VRC_REPROBE_DISABLE : 1; - unsigned int : 26; - } nv33; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union PA_SC_VRS_SURFACE_CNTL_1 { - struct { - unsigned int FORCE_SC_VRS_RATE_FINE : 1; - unsigned int FORCE_SC_VRS_RATE_FINE_SHADER_KILL_ENABLE : 1; - unsigned int FORCE_SC_VRS_RATE_FINE_MASK_OPS_ENABLE : 1; - unsigned int FORCE_SC_VRS_RATE_FINE_RATE_16XAA : 1; - unsigned int FORCE_SC_VRS_RATE_FINE_Z_OR_STENCIL : 1; - unsigned int FORCE_SC_VRS_RATE_FINE_PRE_SHADER_DEPTH_COVERAGE_ENABLED : 1; - unsigned int FORCE_SC_VRS_RATE_FINE_POST_DEPTH_IMPORT : 1; - unsigned int FORCE_SC_VRS_RATE_FINE_POPS : 1; - unsigned int USE_ONLY_VRS_RATE_FINE_CFG : 1; - unsigned int : 3; - unsigned int DISABLE_SSAA_VRS_RATE_NORMALIZATION : 1; - unsigned int : 2; - unsigned int DISABLE_PS_ITER_RATE_COMBINER_PASSTHRU_OVERRIDE : 1; - unsigned int : 3; - unsigned int DISABLE_CMASK_RATE_HINT_FORCE_ZERO_OVERRIDE : 1; - unsigned int DISABLE_SSAA_DETAIL_TO_EXPOSED_RATE_CLAMPING : 1; - unsigned int VRS_ECO_SPARE_0 : 1; - unsigned int VRS_ECO_SPARE_1 : 1; - unsigned int VRS_ECO_SPARE_2 : 1; - unsigned int VRS_ECO_SPARE_3 : 1; - unsigned int VRS_ECO_SPARE_4 : 1; - unsigned int VRS_ECO_SPARE_5 : 1; - unsigned int VRS_ECO_SPARE_6 : 1; - unsigned int VRS_ECO_SPARE_7 : 1; - unsigned int VRS_ECO_SPARE_8 : 1; - unsigned int VRS_ECO_SPARE_9 : 1; - unsigned int VRS_ECO_SPARE_10 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union PA_SC_WINDOW_OFFSET { - struct { - unsigned int WINDOW_X_OFFSET : 16; - unsigned int WINDOW_Y_OFFSET : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_WINDOW_SCISSOR_BR { - struct { - unsigned int BR_X : 15; - unsigned int : 1; - unsigned int BR_Y : 15; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SC_WINDOW_SCISSOR_TL { - struct { - unsigned int TL_X : 15; - unsigned int : 1; - unsigned int TL_Y : 15; - unsigned int WINDOW_OFFSET_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SIDEBAND_REQUEST_DELAYS { - struct { - unsigned int RETRY_DELAY : 16; - unsigned int INITIAL_DELAY : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_STATE_STEREO_X { - struct { - unsigned int STEREO_X_OFFSET : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_STEREO_CNTL { - struct { - unsigned int : 1; - unsigned int STEREO_MODE : 4; - unsigned int RT_SLICE_MODE : 3; - unsigned int : 24; - } most; - struct { - unsigned int : 8; - unsigned int RT_SLICE_OFFSET : 4; - unsigned int : 4; - unsigned int VP_ID_MODE : 3; - unsigned int VP_ID_OFFSET : 4; - unsigned int : 9; - } gfx10Plus; - struct { - unsigned int EN_STEREO : 1; - unsigned int : 7; - unsigned int RT_SLICE_OFFSET : 2; - unsigned int VP_ID_MODE : 3; - unsigned int VP_ID_OFFSET : 4; - unsigned int : 15; - } vg12_Vg20; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_CNTL_STATUS { - struct { - unsigned int : 31; - unsigned int SU_BUSY : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_HARDWARE_SCREEN_OFFSET { - struct { - unsigned int HW_SCREEN_OFFSET_X : 9; - unsigned int : 7; - unsigned int HW_SCREEN_OFFSET_Y : 9; - unsigned int : 7; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_LINE_CNTL { - struct { - unsigned int WIDTH : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_LINE_STIPPLE_CNTL { - struct { - unsigned int LINE_STIPPLE_RESET : 2; - unsigned int EXPAND_FULL_LENGTH : 1; - unsigned int FRACTIONAL_ACCUM : 1; - unsigned int : 28; - } bits, bitfields; - struct { - unsigned int : 4; - unsigned int DIAMOND_ADJUST : 1; - unsigned int : 27; - } gfx09_10; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_LINE_STIPPLE_SCALE { - struct { - unsigned int LINE_STIPPLE_SCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_LINE_STIPPLE_VALUE { - struct { - unsigned int LINE_STIPPLE_VALUE : 24; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_OVER_RASTERIZATION_CNTL { - struct { - unsigned int DISCARD_0_AREA_TRIANGLES : 1; - unsigned int DISCARD_0_AREA_LINES : 1; - unsigned int DISCARD_0_AREA_POINTS : 1; - unsigned int DISCARD_0_AREA_RECTANGLES : 1; - unsigned int USE_PROVOKING_ZW : 1; - unsigned int : 27; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 16; - unsigned int : 16; - } most; - struct { - unsigned int PERFCOUNTER_HI : 32; - } gfx103Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } gfx09_1xPlus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx09_1xPlus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 16; - unsigned int : 16; - } most; - struct { - unsigned int PERFCOUNTER_HI : 32; - } gfx103Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } gfx09_1xPlus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } gfx09_1xPlus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 16; - unsigned int : 16; - } most; - struct { - unsigned int PERFCOUNTER_HI : 32; - } gfx103Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 28; - unsigned int PERF_MODE : 4; - } gfx09_1xPlus; - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 4; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER2_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 16; - unsigned int : 16; - } most; - struct { - unsigned int PERFCOUNTER_HI : 32; - } gfx103Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 28; - unsigned int PERF_MODE : 4; - } gfx09_1xPlus; - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 4; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PERFCOUNTER3_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_POINT_MINMAX { - struct { - unsigned int MIN_SIZE : 16; - unsigned int MAX_SIZE : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_POINT_SIZE { - struct { - unsigned int HEIGHT : 16; - unsigned int WIDTH : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_POLY_OFFSET_BACK_OFFSET { - struct { - unsigned int OFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_POLY_OFFSET_BACK_SCALE { - struct { - unsigned int SCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_POLY_OFFSET_CLAMP { - struct { - unsigned int CLAMP : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_POLY_OFFSET_DB_FMT_CNTL { - struct { - unsigned int POLY_OFFSET_NEG_NUM_DB_BITS : 8; - unsigned int POLY_OFFSET_DB_IS_FLOAT_FMT : 1; - unsigned int : 23; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_POLY_OFFSET_FRONT_OFFSET { - struct { - unsigned int OFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_POLY_OFFSET_FRONT_SCALE { - struct { - unsigned int SCALE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_PRIM_FILTER_CNTL { - struct { - unsigned int TRIANGLE_FILTER_DISABLE : 1; - unsigned int LINE_FILTER_DISABLE : 1; - unsigned int POINT_FILTER_DISABLE : 1; - unsigned int RECTANGLE_FILTER_DISABLE : 1; - unsigned int TRIANGLE_EXPAND_ENA : 1; - unsigned int LINE_EXPAND_ENA : 1; - unsigned int POINT_EXPAND_ENA : 1; - unsigned int RECTANGLE_EXPAND_ENA : 1; - unsigned int PRIM_EXPAND_CONSTANT : 8; - unsigned int : 14; - unsigned int XMAX_RIGHT_EXCLUSION : 1; - unsigned int YMAX_BOTTOM_EXCLUSION : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_SC_MODE_CNTL { - struct { - unsigned int CULL_FRONT : 1; - unsigned int CULL_BACK : 1; - unsigned int FACE : 1; - unsigned int POLY_MODE : 2; - unsigned int POLYMODE_FRONT_PTYPE : 3; - unsigned int POLYMODE_BACK_PTYPE : 3; - unsigned int POLY_OFFSET_FRONT_ENABLE : 1; - unsigned int POLY_OFFSET_BACK_ENABLE : 1; - unsigned int POLY_OFFSET_PARA_ENABLE : 1; - unsigned int : 2; - unsigned int VTX_WINDOW_OFFSET_ENABLE : 1; - unsigned int : 2; - unsigned int PROVOKING_VTX_LAST : 1; - unsigned int PERSP_CORR_DIS : 1; - unsigned int MULTI_PRIM_IB_ENA : 1; - unsigned int RIGHT_TRIANGLE_ALTERNATE_GRADIENT_REF : 1; - unsigned int NEW_QUAD_DECOMPOSITION : 1; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int KEEP_TOGETHER_ENABLE : 1; - unsigned int : 7; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_SMALL_PRIM_FILTER_CNTL { - struct { - unsigned int SMALL_PRIM_FILTER_ENABLE : 1; - unsigned int TRIANGLE_FILTER_DISABLE : 1; - unsigned int LINE_FILTER_DISABLE : 1; - unsigned int POINT_FILTER_DISABLE : 1; - unsigned int RECTANGLE_FILTER_DISABLE : 1; - unsigned int : 27; - } bits, bitfields; - struct { - unsigned int : 6; - unsigned int SC_1XMSAA_COMPATIBLE_DISABLE : 1; - unsigned int : 25; - } gfx09_1xPlus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_SU_VTX_CNTL { - struct { - unsigned int PIX_CENTER : 1; - unsigned int ROUND_MODE : 2; - unsigned int QUANT_MODE : 3; - unsigned int : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_UTCL1_CNTL1 { - struct { - unsigned int FORCE_4K_L2_RESP : 1; - unsigned int GPUVM_64K_DEFAULT : 1; - unsigned int GPUVM_PERM_MODE : 1; - unsigned int RESP_MODE : 2; - unsigned int RESP_FAULT_MODE : 2; - unsigned int CLIENTID : 9; - unsigned int SPARE : 1; - unsigned int ENABLE_PUSH_LFIFO : 1; - unsigned int ENABLE_LFIFO_PRI_ARB : 1; - unsigned int REG_INV_VMID : 4; - unsigned int REG_INV_ALL_VMID : 1; - unsigned int REG_INV_TOGGLE : 1; - unsigned int INVALIDATE_ALL_VMID : 1; - unsigned int FORCE_MISS : 1; - unsigned int FORCE_IN_ORDER : 1; - unsigned int REDUCE_FIFO_DEPTH_BY_2 : 2; - unsigned int REDUCE_CACHE_SIZE_BY_2 : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PA_UTCL1_CNTL2 { - struct { - unsigned int SPARE1 : 8; - unsigned int SPARE2 : 1; - unsigned int MTYPE_OVRD_DIS : 1; - unsigned int LINE_VALID : 1; - unsigned int SPARE3 : 1; - unsigned int GPUVM_INV_MODE : 1; - unsigned int ENABLE_SHOOTDOWN_OPT : 1; - unsigned int FORCE_SNOOP : 1; - unsigned int FORCE_GPUVM_INV_ACK : 1; - unsigned int SPARE4 : 2; - unsigned int ENABLE_PERF_EVENT_RD_WR : 1; - unsigned int PERF_EVENT_RD_WR : 1; - unsigned int ENABLE_PERF_EVENT_VMID : 1; - unsigned int PERF_EVENT_VMID : 4; - unsigned int SPARE5 : 1; - unsigned int FORCE_FRAG_2M_TO_64K : 1; - unsigned int RESERVED : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int : 8; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - struct { - unsigned int : 16; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int : 8; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv31; -#endif - struct { - unsigned int : 16; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int : 8; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int : 8; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - struct { - unsigned int : 16; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int : 8; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv31; -#endif - struct { - unsigned int : 16; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int : 8; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int : 8; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - struct { - unsigned int : 16; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int : 8; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv31; -#endif - struct { - unsigned int : 16; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int : 8; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int : 8; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - struct { - unsigned int : 16; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int : 8; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv31; -#endif - struct { - unsigned int : 16; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int : 8; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int : 8; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - struct { - unsigned int : 16; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int : 8; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 16; - unsigned int BankSel : 8; - unsigned int : 8; - } nv31; -#endif - struct { - unsigned int : 16; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int : 8; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31 -union PerfMonCtl6 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int BankSel : 8; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtl7 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int BankSel : 8; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtl8 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int BankSel : 8; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtl9 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int BankSel : 8; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtl10 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int BankSel : 8; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtl11 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int BankSel : 8; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtl12 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int BankSel : 8; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union PerfMonCtlClk { - struct { - unsigned int : 24; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 26; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 16; - unsigned int ClkGate : 1; - unsigned int : 9; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 16; - unsigned int ClkGate : 1; - unsigned int : 9; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 16; - unsigned int ClkGate : 1; - unsigned int : 9; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 16; - unsigned int ClkGate : 1; - unsigned int : 9; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int GlblResetMsk : 13; - unsigned int : 9; - unsigned int ClkGate : 1; - unsigned int SpmEn : 1; - unsigned int : 2; - unsigned int NumCounters : 4; - unsigned int : 2; - } nv31; -#endif - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 26; - } vg12_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv31; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv31; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv31; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv31; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv24; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 18; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } nv31; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31 -union PerfMonCtr6_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 1; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtr6_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtr7_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 1; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtr7_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtr8_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 1; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtr8_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtr9_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 1; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtr9_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtr10_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 1; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtr10_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtr11_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 1; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtr11_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtr12_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 1; - unsigned int ThreshCntEn : 2; - unsigned int ThreshCnt : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31 -union PerfMonCtr12_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_CGTT_MGCG_OVERRIDE { - struct { - unsigned int : 1; - unsigned int RLC_CGTT_SCLK_OVERRIDE : 1; - unsigned int GFXIP_MGCG_OVERRIDE : 1; - unsigned int GFXIP_CGCG_OVERRIDE : 1; - unsigned int GFXIP_CGLS_OVERRIDE : 1; - unsigned int GRBM_CGTT_SCLK_OVERRIDE : 1; - unsigned int GFXIP_MGLS_OVERRIDE : 1; - unsigned int GFXIP_GFX3D_CG_OVERRIDE : 1; - unsigned int : 24; - } bits, bitfields; - struct { - unsigned int : 16; - unsigned int ENABLE_CGTS_LEGACY : 1; - unsigned int : 15; - } most; - struct { - unsigned int CPF_CGTT_SCLK_OVERRIDE : 1; - unsigned int : 7; - unsigned int RESERVED : 24; - } gfx09_0; - struct { - unsigned int RESERVED_0 : 1; - unsigned int : 8; - unsigned int RESERVED_15_9 : 7; - unsigned int : 1; - unsigned int RESERVED_31_17 : 15; - } gfx09_1x; - struct { - unsigned int : 8; - unsigned int GFXIP_FGCG_OVERRIDE : 1; - unsigned int : 23; - } gfx09_1xPlus; - struct { - unsigned int RESERVED_0 : 1; - unsigned int : 16; - unsigned int RESERVED_31_17 : 15; - } gfx10; - struct { - unsigned int : 9; - unsigned int RESERVED_15_9 : 7; - unsigned int : 16; - } gfx10Core; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int RLC_REPEATER_FGCG_OVERRIDE : 1; - unsigned int : 8; - unsigned int GFXIP_REPEATER_FGCG_OVERRIDE : 1; - unsigned int PERFMON_CLOCK_STATE : 1; - unsigned int RESERVED_16_11 : 6; - unsigned int GC_CAC_MGCG_CLK_CNTL : 1; - unsigned int SE_CAC_MGCG_CLK_CNTL : 1; - unsigned int RESERVED_31_19 : 13; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_PERFCOUNTER0_SELECT { - struct { - unsigned int PERFCOUNTER_SELECT : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_PERFCOUNTER1_SELECT { - struct { - unsigned int PERFCOUNTER_SELECT : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_PERFMON_CLK_CNTL { - struct { - unsigned int PERFMON_CLOCK_STATE : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_PERFMON_CNTL { - struct { - unsigned int PERFMON_STATE : 3; - unsigned int : 7; - unsigned int PERFMON_SAMPLE_ENABLE : 1; - unsigned int : 21; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_CTRL { - struct { - unsigned int StrobeResetPerfMonitors : 1; - unsigned int StrobeStartAccumulation : 1; - unsigned int StrobeRearmAccum : 1; - unsigned int : 29; - } bits, bitfields; - struct { - unsigned int : 3; - unsigned int StrobeSpmDoneInt : 1; - unsigned int StrobeAccumDoneInt : 1; - unsigned int StrobeResetAccum : 1; - unsigned int StrobeStartSpm : 4; - unsigned int : 22; - } most; - struct { - unsigned int : 10; - unsigned int RESERVED : 22; - } gfx101; - struct { - unsigned int : 3; - unsigned int StrobeResetSpmBlock : 1; - unsigned int StrobeStartSpm : 4; - unsigned int StrobeRearmSwaAccum : 1; - unsigned int StrobeStartSwa : 1; - unsigned int StrobePerfmonSampleWires : 1; - unsigned int RESERVED : 21; - } gfx103Plus; - struct { - unsigned int : 10; - unsigned int RESERVED : 22; - } rv2x_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_CTRLRAM_ADDR { - struct { - unsigned int addr : 9; - unsigned int : 23; - } most; -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int : 9; - unsigned int RESERVED : 23; - } apu11; -#endif - struct { - unsigned int : 9; - unsigned int RESERVED : 23; - } gfx101; - struct { - unsigned int addr : 11; - unsigned int RESERVED : 21; - } gfx103; -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int addr : 11; - unsigned int RESERVED : 21; - } nv3x; -#endif - struct { - unsigned int addr : 8; - unsigned int RESERVED : 24; - } rv2x_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_CTRLRAM_ADDR_OFFSET { - struct { - unsigned int global_offset : 8; - unsigned int spmwithaccum_se_offset : 8; - unsigned int spmwithaccum_global_offset : 8; - unsigned int RESERVED : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_CTRLRAM_DATA { - struct { - unsigned int data : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_DATARAM_32BITCNTRS_REGIONS { - struct { - unsigned int spp_addr_region : 8; - unsigned int swa_addr_region : 8; - unsigned int RESERVED : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_DATARAM_ADDR { - struct { - unsigned int addr : 7; - unsigned int RESERVED : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_DATARAM_DATA { - struct { - unsigned int data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_DATARAM_WRCOUNT { - struct { - unsigned int DataRamWrCount : 19; - unsigned int RESERVED : 13; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_MODE { - struct { - unsigned int EnableAccum : 1; - unsigned int : 31; - } bits, bitfields; - struct { - unsigned int : 1; - unsigned int AutoAccumEn : 1; - unsigned int AutoSpmEn : 1; - unsigned int Globals_LoadOverride : 1; - unsigned int SE0_LoadOverride : 1; - unsigned int : 27; - } most; - struct { - unsigned int : 5; - unsigned int SE1_LoadOverride : 1; - unsigned int AutoResetPerfmonDisable : 1; - unsigned int RESERVED : 25; - } gfx101; - struct { - unsigned int : 1; - unsigned int EnableSpmWithAccumMode : 1; - unsigned int EnableSPPMode : 1; - unsigned int AutoResetPerfmonDisable : 1; - unsigned int : 1; - unsigned int AutoAccumEn : 1; - unsigned int SwaAutoAccumEn : 1; - unsigned int AutoSpmEn : 1; - unsigned int SwaAutoSpmEn : 1; - unsigned int Globals_LoadOverride : 1; - unsigned int Globals_SwaLoadOverride : 1; - unsigned int SE0_LoadOverride : 1; - unsigned int SE0_SwaLoadOverride : 1; - unsigned int : 19; - } gfx103Plus; - struct { - unsigned int : 4; - unsigned int SwaAutoResetPerfmonDisable : 1; - unsigned int : 27; - } gfx10Vrs; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 13; - unsigned int SE1_LoadOverride : 1; - unsigned int SE1_SwaLoadOverride : 1; - unsigned int SE2_LoadOverride : 1; - unsigned int SE2_SwaLoadOverride : 1; - unsigned int SE3_LoadOverride : 1; - unsigned int SE3_SwaLoadOverride : 1; - unsigned int : 13; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 13; - unsigned int SE1_LoadOverride : 1; - unsigned int SE1_SwaLoadOverride : 1; - unsigned int : 17; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 13; - unsigned int SE1_LoadOverride : 1; - unsigned int SE1_SwaLoadOverride : 1; - unsigned int : 17; - } nv23; -#endif -#if CHIP_HDR_NAVI31 - struct { - unsigned int : 15; - unsigned int SE2_LoadOverride : 1; - unsigned int SE2_SwaLoadOverride : 1; - unsigned int SE3_LoadOverride : 1; - unsigned int SE3_SwaLoadOverride : 1; - unsigned int SE4_LoadOverride : 1; - unsigned int SE4_SwaLoadOverride : 1; - unsigned int SE5_LoadOverride : 1; - unsigned int SE5_SwaLoadOverride : 1; - unsigned int : 9; - } nv31; -#endif -#if CHIP_HDR_NAVI32 - struct { - unsigned int : 15; - unsigned int SE2_LoadOverride : 1; - unsigned int SE2_SwaLoadOverride : 1; - unsigned int : 15; - } nv32; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int : 13; - unsigned int SE1_LoadOverride : 1; - unsigned int SE1_SwaLoadOverride : 1; - unsigned int : 17; - } nv3x; -#endif - struct { - unsigned int : 5; - unsigned int AutoResetPerfmonDisable : 1; - unsigned int RESERVED : 26; - } rv2x_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_SAMPLES_REQUESTED { - struct { - unsigned int SamplesRequested : 8; - unsigned int : 24; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int RESERVED : 24; - } gfx101; - struct { - unsigned int : 8; - unsigned int RESERVED : 24; - } rv2x_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_STATUS { - struct { - unsigned int NumbSamplesCompleted : 8; - unsigned int AccumDone : 1; - unsigned int SpmDone : 1; - unsigned int AccumOverflow : 1; - unsigned int AccumArmed : 1; - unsigned int SequenceInProgress : 1; - unsigned int FinalSequenceInProgress : 1; - unsigned int AllFifosEmpty : 1; - unsigned int FSMIsIdle : 1; - unsigned int : 16; - } bits, bitfields; - struct { - unsigned int : 16; - unsigned int RESERVED : 16; - } gfx101; - struct { - unsigned int : 20; - unsigned int AllSegsDone : 1; - unsigned int RearmSwaPending : 1; - unsigned int RearmSppPending : 1; - unsigned int : 9; - } gfx103CorePlus; - struct { - unsigned int : 16; - unsigned int SwaAccumDone : 1; - unsigned int SwaSpmDone : 1; - unsigned int SwaAccumOverflow : 1; - unsigned int SwaAccumArmed : 1; - unsigned int : 12; - } gfx103Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 23; - unsigned int MultiSampleAborted : 1; - unsigned int RESERVED : 8; - } gfx11; -#endif -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 23; - unsigned int RESERVED : 9; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 23; - unsigned int MultiSampleAborted : 1; - unsigned int RESERVED : 8; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 23; - unsigned int MultiSampleAborted : 1; - unsigned int RESERVED : 8; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 23; - unsigned int MultiSampleAborted : 1; - unsigned int RESERVED : 8; - } nv24; -#endif - struct { - unsigned int : 23; - unsigned int MultiSampleAborted : 1; - unsigned int RESERVED : 8; - } raphael; - struct { - unsigned int : 23; - unsigned int MultiSampleAborted : 1; - unsigned int RESERVED : 8; - } rembrandt; - struct { - unsigned int : 16; - unsigned int RESERVED : 16; - } rv2x_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_SWA_DATARAM_ADDR { - struct { - unsigned int addr : 7; - unsigned int RESERVED : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_SWA_DATARAM_DATA { - struct { - unsigned int data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_ACCUM_THRESHOLD { - struct { - unsigned int Threshold : 16; - unsigned int : 16; - } bits, bitfields; - struct { - unsigned int : 16; - unsigned int RESERVED : 16; - } gfx101; - struct { - unsigned int : 16; - unsigned int RESERVED : 16; - } rv2x_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_CBR0_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_CBR1_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_CB_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_CPC_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_CPF_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_CPG_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_DBR0_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_DBR1_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_DB_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_DESER_START_SKEW { - struct { - unsigned int DESER_START_SKEW : 7; - unsigned int RESERVED : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_GDS_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_GFXCLOCK_HIGHCOUNT { - struct { - unsigned int GFXCLOCK_HIGHCOUNT : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_GFXCLOCK_LOWCOUNT { - struct { - unsigned int GFXCLOCK_LOWCOUNT : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_GLB_SAMPLEDELAY_IND_ADDR { - struct { - unsigned int GLB_SAMPLEDELAY_INDEX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_GLB_SAMPLEDELAY_IND_DATA { - struct { - unsigned int data : 7; - unsigned int RESERVED : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_GLOBALS_MUXSEL_SKEW { - struct { - unsigned int GLOBALS_MUXSEL_SKEW : 7; - unsigned int RESERVED : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_GLOBALS_SAMPLE_SKEW { - struct { - unsigned int GLOBALS_SAMPLE_SKEW : 7; - unsigned int RESERVED : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_GLOBAL_DELAY_IND_ADDR { - struct { - unsigned int ADDR : 12; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_GLOBAL_DELAY_IND_DATA { - struct { - unsigned int DATA : 6; - unsigned int : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union RLC_SPM_GLOBAL_MUXSEL_ADDR { - struct { - unsigned int PERFMON_SEL_ADDR : 32; - } gfx09; - struct { - unsigned int PERFMON_SEL_ADDR : 8; - unsigned int RESERVED : 24; - } gfx10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int ADDR : 12; - unsigned int : 20; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_GLOBAL_MUXSEL_ADDR_OFFSET { - struct { - unsigned int OFFSET : 16; - unsigned int RESERVED : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_GLOBAL_MUXSEL_DATA { - struct { - unsigned int PERFMON_SEL_DATA : 32; - } gfx09_10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SEL0 : 16; - unsigned int SEL1 : 16; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_IA_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_INT_CNTL { - struct { - unsigned int RLC_SPM_INT_CNTL : 1; - unsigned int RESERVED : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_INT_INFO_1 { - struct { - unsigned int INTERRUPT_INFO_1 : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_INT_INFO_2 { - struct { - unsigned int INTERRUPT_INFO_2 : 16; - unsigned int INTERRUPT_ID : 8; - unsigned int RESERVED : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_INT_STATUS { - struct { - unsigned int RLC_SPM_INT_STATUS : 1; - unsigned int RESERVED : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_MC_CNTL { - struct { - unsigned int RLC_SPM_VMID : 4; - unsigned int : 28; - } bits, bitfields; -#if CHIP_HDR_PHOENIX1 - struct { - unsigned int : 13; - unsigned int RESERVED_2 : 1; - unsigned int : 18; - } apu11; -#endif - struct { - unsigned int : 4; - unsigned int RLC_SPM_POLICY : 1; - unsigned int RLC_SPM_PERF_CNTR : 1; - unsigned int RLC_SPM_FED : 1; - unsigned int RLC_SPM_MTYPE_OVER : 1; - unsigned int RLC_SPM_MTYPE : 2; - unsigned int RESERVED : 22; - } gfx09; - struct { - unsigned int : 13; - unsigned int RESERVED_2 : 1; - unsigned int : 2; - unsigned int RESERVED : 16; - } gfx101; - struct { - unsigned int : 16; - unsigned int RESERVED_3 : 2; - unsigned int : 14; - } gfx103CorePlus; - struct { - unsigned int : 20; - unsigned int RESERVED : 12; - } gfx103PlusExclusive; - struct { - unsigned int : 12; - unsigned int RLC_SPM_BC : 1; - unsigned int : 1; - unsigned int RLC_SPM_VOL : 1; - unsigned int : 17; - } gfx10CorePlus; - struct { - unsigned int : 4; - unsigned int RLC_SPM_POLICY : 2; - unsigned int RLC_SPM_PERF_CNTR : 1; - unsigned int RLC_SPM_FED : 1; - unsigned int RLC_SPM_MTYPE_OVER : 1; - unsigned int RLC_SPM_MTYPE : 3; - unsigned int : 3; - unsigned int RLC_SPM_NOFILL : 1; - unsigned int : 16; - } gfx10Plus; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 13; - unsigned int RLC_SPM_RO : 1; - unsigned int : 4; - unsigned int RLC_SPM_LLC_NOALLOC : 1; - unsigned int RLC_SPM_LLC_NOALLOC_OVER : 1; - unsigned int : 12; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 13; - unsigned int RLC_SPM_RO : 1; - unsigned int : 4; - unsigned int RLC_SPM_LLC_NOALLOC : 1; - unsigned int RLC_SPM_LLC_NOALLOC_OVER : 1; - unsigned int : 12; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 13; - unsigned int RLC_SPM_RO : 1; - unsigned int : 4; - unsigned int RLC_SPM_LLC_NOALLOC : 1; - unsigned int RLC_SPM_LLC_NOALLOC_OVER : 1; - unsigned int : 12; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 13; - unsigned int RLC_SPM_RO : 1; - unsigned int : 4; - unsigned int RLC_SPM_LLC_NOALLOC : 1; - unsigned int RLC_SPM_LLC_NOALLOC_OVER : 1; - unsigned int : 12; - } nv24; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int : 13; - unsigned int RLC_SPM_RO : 1; - unsigned int : 4; - unsigned int RLC_SPM_LLC_NOALLOC : 1; - unsigned int RLC_SPM_LLC_NOALLOC_OVER : 1; - unsigned int : 12; - } nv3x; -#endif - struct { - unsigned int : 13; - unsigned int RESERVED_2 : 1; - unsigned int : 18; - } raphael; - struct { - unsigned int : 13; - unsigned int RESERVED_2 : 1; - unsigned int : 18; - } rembrandt; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_MODE { - struct { - unsigned int MODE : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_PAUSE { - struct { - unsigned int PAUSE : 1; - unsigned int PAUSED : 1; - unsigned int : 30; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union RLC_SPM_PA_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_PERFMON_CNTL { - struct { - unsigned int : 12; - unsigned int PERFMON_RING_MODE : 2; - unsigned int : 2; - unsigned int PERFMON_SAMPLE_INTERVAL : 16; - } bits, bitfields; - struct { - unsigned int : 14; - unsigned int RESERVED : 2; - unsigned int : 16; - } gfx09; - struct { - unsigned int : 14; - unsigned int RESERVED : 2; - unsigned int : 16; - } gfx10Core; - struct { - unsigned int RESERVED1 : 12; - unsigned int : 20; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 14; - unsigned int DISABLE_GFXCLOCK_COUNT : 1; - unsigned int RESERVED : 1; - unsigned int : 16; - } gfx11; -#endif - struct { - unsigned int RLC_DBG_STRM_EN : 1; - unsigned int RLC_DBG_STRM_EXT_SAMPLE : 1; - unsigned int RESERVED1 : 10; - unsigned int : 20; - } raven; - struct { - unsigned int RESERVED1 : 12; - unsigned int : 20; - } vega; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_PERFMON_GLB_SEGMENT_SIZE { - struct { - unsigned int PERFMON_SEGMENT_SIZE : 8; - unsigned int GLOBAL_NUM_LINE : 8; - unsigned int RESERVED : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_PERFMON_RING_BASE_HI { - struct { - unsigned int RING_BASE_HI : 16; - unsigned int RESERVED : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_PERFMON_RING_BASE_LO { - struct { - unsigned int RING_BASE_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_PERFMON_RING_SIZE { - struct { - unsigned int RING_BASE_SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_PERFMON_SAMPLE_DELAY_MAX { - struct { - unsigned int PERFMON_MAX_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE { - struct { - unsigned int SE0_NUM_LINE : 8; - unsigned int SE1_NUM_LINE : 8; - unsigned int SE2_NUM_LINE : 8; - unsigned int SE3_NUM_LINE : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_PERFMON_SEGMENT_SIZE { - struct { - unsigned int PERFMON_SEGMENT_SIZE : 8; - unsigned int RESERVED1 : 3; - unsigned int GLOBAL_NUM_LINE : 5; - unsigned int SE0_NUM_LINE : 5; - unsigned int SE1_NUM_LINE : 5; - unsigned int SE2_NUM_LINE : 5; - unsigned int RESERVED : 1; - } gfx09_10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int TOTAL_NUM_SEGMENT : 16; - unsigned int GLOBAL_NUM_SEGMENT : 8; - unsigned int SE_NUM_SEGMENT : 8; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_PERFMON_SWA_GLB_SEGMENT_SIZE { - struct { - unsigned int PERFMON_SEGMENT_SIZE : 8; - unsigned int GLOBAL_NUM_LINE : 8; - unsigned int RESERVED : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_PERFMON_SWA_SE3TO0_SEGMENT_SIZE { - struct { - unsigned int SE0_NUM_LINE : 8; - unsigned int SE1_NUM_LINE : 8; - unsigned int SE2_NUM_LINE : 8; - unsigned int SE3_NUM_LINE : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_PERFMON_SWA_SEGMENT_SIZE { - struct { - unsigned int PERFMON_SEGMENT_SIZE : 8; - unsigned int RESERVED1 : 3; - unsigned int GLOBAL_NUM_LINE : 5; - unsigned int SE0_NUM_LINE : 5; - unsigned int SE1_NUM_LINE : 5; - unsigned int SE2_NUM_LINE : 5; - unsigned int RESERVED : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_RING_RDPTR { - struct { - unsigned int PERFMON_RING_RDPTR : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_RING_WRPTR { - struct { - unsigned int RESERVED : 5; - unsigned int PERFMON_RING_WRPTR : 27; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_RMI_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_RSPM_CMD { - struct { - unsigned int CMD : 4; - unsigned int : 28; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_RSPM_CMD_ACK { - struct { - unsigned int SE0_ACK : 1; - unsigned int SE1_ACK : 1; - unsigned int SE2_ACK : 1; - unsigned int SE3_ACK : 1; - unsigned int SE4_ACK : 1; - unsigned int SE5_ACK : 1; - unsigned int SE6_ACK : 1; - unsigned int SE7_ACK : 1; - unsigned int SPM_ACK : 1; - unsigned int : 23; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_RSPM_REQ_DATA_HI { - struct { - unsigned int DATA : 12; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_RSPM_REQ_DATA_LO { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_RSPM_REQ_OP { - struct { - unsigned int OP : 4; - unsigned int : 28; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_RSPM_RET_DATA { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_RSPM_RET_OP { - struct { - unsigned int OP : 4; - unsigned int : 4; - unsigned int VALID : 1; - unsigned int : 23; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union RLC_SPM_SAMPLE_CNT { - struct { - unsigned int COUNT : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_SC_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_SEGMENT_THRESHOLD { - struct { - unsigned int NUM_SEGMENT_THRESHOLD : 32; - } gfx09; - struct { - unsigned int NUM_SEGMENT_THRESHOLD : 8; - unsigned int RESERVED : 24; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_SE_DELAY_IND_ADDR { - struct { - unsigned int ADDR : 12; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_SE_DELAY_IND_DATA { - struct { - unsigned int DATA : 6; - unsigned int : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union RLC_SPM_SE_MUXSEL_ADDR { - struct { - unsigned int PERFMON_SEL_ADDR : 32; - } gfx09; - struct { - unsigned int PERFMON_SEL_ADDR : 9; - unsigned int RESERVED : 23; - } gfx10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int ADDR : 12; - unsigned int : 20; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_SE_MUXSEL_ADDR_OFFSET { - struct { - unsigned int OFFSET : 16; - unsigned int RESERVED : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_SE_MUXSEL_DATA { - struct { - unsigned int PERFMON_SEL_DATA : 32; - } gfx09_10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SEL0 : 16; - unsigned int SEL1 : 16; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_SE_MUXSEL_SKEW { - struct { - unsigned int SE_MUXSEL_SKEW : 7; - unsigned int RESERVED : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_SE_RSPM_REQ_DATA_HI { - struct { - unsigned int DATA : 12; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_SE_RSPM_REQ_DATA_LO { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_SE_RSPM_REQ_OP { - struct { - unsigned int OP : 4; - unsigned int : 28; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_SE_RSPM_RET_DATA { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_SE_RSPM_RET_OP { - struct { - unsigned int OP : 4; - unsigned int : 4; - unsigned int VALID : 1; - unsigned int : 23; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union RLC_SPM_SE_SAMPLEDELAY_IND_ADDR { - struct { - unsigned int SE_SAMPLEDELAY_INDEX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_SE_SAMPLEDELAY_IND_DATA { - struct { - unsigned int data : 7; - unsigned int RESERVED : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_SE_SAMPLE_SKEW { - struct { - unsigned int SE_SAMPLE_SKEW : 7; - unsigned int RESERVED : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_SPARE { - struct { - unsigned int SPARE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union RLC_SPM_SPI_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_SQG_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union RLC_SPM_STATUS { - struct { - unsigned int CTL_BUSY : 1; - unsigned int RSPM_REG_BUSY : 1; - unsigned int SPM_RSPM_BUSY : 1; - unsigned int SPM_RSPM_IO_BUSY : 1; - unsigned int SE_RSPM_IO_BUSY : 8; - unsigned int : 3; - unsigned int ACCUM_BUSY : 1; - unsigned int FSM_MASTER_STATE : 4; - unsigned int FSM_MEMORY_STATE : 4; - unsigned int CTL_REQ_STATE : 2; - unsigned int CTL_RET_STATE : 1; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union RLC_SPM_SX_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_TA_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_TCA_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_TCC_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_TCP_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_TD_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_THREAD_TRACE_CTRL { - struct { - unsigned int THREAD_TRACE_INT_EN : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_UTCL1_CNTL { - struct { - unsigned int XNACK_REDO_TIMER_CNT : 20; - unsigned int : 4; - unsigned int DROP_MODE : 1; - unsigned int BYPASS : 1; - unsigned int INVALIDATE : 1; - unsigned int FRAG_LIMIT_MODE : 1; - unsigned int FORCE_SNOOP : 1; - unsigned int : 1; - unsigned int RESERVED : 2; - } bits, bitfields; - struct { - unsigned int : 29; - unsigned int FORCE_SD_VMID_DIRTY : 1; - unsigned int : 2; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_UTCL1_ERROR_1 { - struct { - unsigned int Translated_ReqError : 2; - unsigned int Translated_ReqErrorVmid : 4; - unsigned int Translated_ReqErrorAddr_MSB : 4; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_UTCL1_ERROR_2 { - struct { - unsigned int Translated_ReqErrorAddr_LSB : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_VGT_PERFMON_SAMPLE_DELAY { - struct { - unsigned int PERFMON_SAMPLE_DELAY : 8; - unsigned int RESERVED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_VIRT_CTRL { - struct { - unsigned int PauseSpmSamplingRequest : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RLC_SPM_VIRT_STATUS { - struct { - unsigned int SpmSamplingPaused : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER0_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 9; - unsigned int : 1; - unsigned int PERF_SEL1 : 9; - unsigned int : 13; - } most; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER0_SELECT1 { - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL2 : 9; - unsigned int : 1; - unsigned int PERF_SEL3 : 9; - unsigned int : 13; - } most; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER1_SELECT { - struct { - unsigned int : 28; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 9; - unsigned int : 23; - } most; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER2_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 9; - unsigned int : 1; - unsigned int PERF_SEL1 : 9; - unsigned int : 13; - } most; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER2_SELECT1 { - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL2 : 9; - unsigned int : 1; - unsigned int PERF_SEL3 : 9; - unsigned int : 13; - } most; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERFCOUNTER3_SELECT { - struct { - unsigned int : 28; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 9; - unsigned int : 23; - } most; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RMI_PERF_COUNTER_CNTL { - struct { - unsigned int TRANS_BASED_PERF_EN_SEL : 2; - unsigned int EVENT_BASED_PERF_EN_SEL : 2; - unsigned int TC_PERF_EN_SEL : 2; - unsigned int PERF_EVENT_WINDOW_MASK0 : 2; - unsigned int : 2; - unsigned int PERF_COUNTER_CID : 4; - unsigned int PERF_COUNTER_VMID : 5; - unsigned int PERF_COUNTER_BURST_LENGTH_THRESHOLD : 6; - unsigned int PERF_SOFT_RESET : 1; - unsigned int PERF_CNTR_SPM_SEL : 1; - unsigned int : 5; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int PERF_EVENT_WINDOW_MASK1 : 2; - unsigned int : 22; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RPB_PERFCOUNTER0_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RPB_PERFCOUNTER1_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RPB_PERFCOUNTER2_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RPB_PERFCOUNTER3_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RPB_PERFCOUNTER_HI { - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RPB_PERFCOUNTER_LO { - struct { - unsigned int COUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RPB_PERFCOUNTER_RSLT_CNTL { - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union RPB_PERF_COUNTER_CNTL { - struct { - unsigned int PERF_COUNTER_SELECT : 2; - unsigned int : 30; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCNT_MISC_CNTL { - struct { - unsigned int CMD_OP : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCNT_PERFCOUNTER0_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCNT_PERFCOUNTER1_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCNT_PERFCOUNTER_HI { - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCNT_PERFCOUNTER_LO { - struct { - unsigned int COUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCNT_PERFCOUNTER_RSLT_CNTL { - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCOUNTER0_RESULT { - struct { - unsigned int PERF_COUNT : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCOUNTER1_RESULT { - struct { - unsigned int PERF_COUNT : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFCOUNTER_TAG_DELAY_RANGE { - struct { - unsigned int RANGE_LOW : 14; - unsigned int RANGE_HIGH : 14; - unsigned int SELECT_RW : 1; - unsigned int : 3; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA0_PERFMON_CNTL { - struct { - unsigned int PERF_ENABLE0 : 1; - unsigned int PERF_CLEAR0 : 1; - unsigned int PERF_SEL0 : 8; - unsigned int PERF_ENABLE1 : 1; - unsigned int PERF_CLEAR1 : 1; - unsigned int PERF_SEL1 : 8; - unsigned int : 12; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI21 || CHIP_HDR_NAVI22 || CHIP_HDR_NAVI23 || CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -union SDMA1_PERFCNT_MISC_CNTL { -#if CHIP_HDR_NAVI21 - struct { - unsigned int CMD_OP : 16; - unsigned int : 16; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int CMD_OP : 16; - unsigned int : 16; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int CMD_OP : 16; - unsigned int : 16; - } nv23; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int CMD_OP : 16; - unsigned int : 16; - } nv3x; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 || CHIP_HDR_NAVI22 || CHIP_HDR_NAVI23 || CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -union SDMA1_PERFCNT_PERFCOUNTER0_CFG { -#if CHIP_HDR_NAVI21 - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } nv23; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } nv3x; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 || CHIP_HDR_NAVI22 || CHIP_HDR_NAVI23 || CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -union SDMA1_PERFCNT_PERFCOUNTER1_CFG { -#if CHIP_HDR_NAVI21 - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } nv23; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } nv3x; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 || CHIP_HDR_NAVI22 || CHIP_HDR_NAVI23 || CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -union SDMA1_PERFCNT_PERFCOUNTER_HI { -#if CHIP_HDR_NAVI21 - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } nv23; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } nv3x; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 || CHIP_HDR_NAVI22 || CHIP_HDR_NAVI23 || CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -union SDMA1_PERFCNT_PERFCOUNTER_LO { -#if CHIP_HDR_NAVI21 - struct { - unsigned int COUNTER_LO : 32; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int COUNTER_LO : 32; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int COUNTER_LO : 32; - } nv23; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int COUNTER_LO : 32; - } nv3x; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 || CHIP_HDR_NAVI22 || CHIP_HDR_NAVI23 || CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -union SDMA1_PERFCNT_PERFCOUNTER_RSLT_CNTL { -#if CHIP_HDR_NAVI21 - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } nv23; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } nv3x; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union SDMA1_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA1_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA1_PERFCOUNTER0_RESULT { - struct { - unsigned int PERF_COUNT : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA1_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA1_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA1_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA1_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA1_PERFCOUNTER1_RESULT { - struct { - unsigned int PERF_COUNT : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA1_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA1_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA1_PERFCOUNTER_TAG_DELAY_RANGE { - struct { - unsigned int RANGE_LOW : 14; - unsigned int RANGE_HIGH : 14; - unsigned int SELECT_RW : 1; - unsigned int : 3; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SDMA1_PERFMON_CNTL { - struct { - unsigned int PERF_ENABLE0 : 1; - unsigned int PERF_CLEAR0 : 1; - unsigned int PERF_SEL0 : 8; - unsigned int PERF_ENABLE1 : 1; - unsigned int PERF_CLEAR1 : 1; - unsigned int PERF_SEL1 : 8; - unsigned int : 12; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCNT_MISC_CNTL { - struct { - unsigned int CMD_OP : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCNT_PERFCOUNTER0_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCNT_PERFCOUNTER1_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCNT_PERFCOUNTER_HI { - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCNT_PERFCOUNTER_LO { - struct { - unsigned int COUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCNT_PERFCOUNTER_RSLT_CNTL { - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA2_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCNT_MISC_CNTL { - struct { - unsigned int CMD_OP : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCNT_PERFCOUNTER0_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCNT_PERFCOUNTER1_CFG { - struct { - unsigned int PERF_SEL : 8; - unsigned int PERF_SEL_END : 8; - unsigned int : 8; - unsigned int PERF_MODE : 4; - unsigned int ENABLE : 1; - unsigned int CLEAR : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCNT_PERFCOUNTER_HI { - struct { - unsigned int COUNTER_HI : 16; - unsigned int COMPARE_VALUE : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCNT_PERFCOUNTER_LO { - struct { - unsigned int COUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCNT_PERFCOUNTER_RSLT_CNTL { - struct { - unsigned int PERF_COUNTER_SELECT : 4; - unsigned int : 4; - unsigned int START_TRIGGER : 8; - unsigned int STOP_TRIGGER : 8; - unsigned int ENABLE_ANY : 1; - unsigned int CLEAR_ALL : 1; - unsigned int STOP_ALL_ON_SATURATE : 1; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI21 -union SDMA3_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union SPI_ARB_CNTL_0 { - struct { - unsigned int EXP_ARB_COL_WT : 4; - unsigned int EXP_ARB_POS_WT : 4; - unsigned int EXP_ARB_GDS_WT : 4; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_ARB_CYCLES_0 { - struct { - unsigned int TS0_DURATION : 16; - unsigned int TS1_DURATION : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_ARB_CYCLES_1 { - struct { - unsigned int TS2_DURATION : 16; - unsigned int TS3_DURATION : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_ARB_PRIORITY { - struct { - unsigned int PIPE_ORDER_TS0 : 3; - unsigned int PIPE_ORDER_TS1 : 3; - unsigned int PIPE_ORDER_TS2 : 3; - unsigned int PIPE_ORDER_TS3 : 3; - unsigned int TS0_DUR_MULT : 2; - unsigned int TS1_DUR_MULT : 2; - unsigned int TS2_DUR_MULT : 2; - unsigned int TS3_DUR_MULT : 2; - unsigned int : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SPI_ATTRIBUTE_RING_BASE { - struct { - unsigned int BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SPI_ATTRIBUTE_RING_SIZE { - struct { - unsigned int MEM_SIZE : 8; - unsigned int : 8; - unsigned int BIG_PAGE : 1; - unsigned int L1_POLICY : 2; - unsigned int L2_POLICY : 2; - unsigned int LLC_NOALLOC : 1; - unsigned int GL1_PERF_COUNTER_DISABLE : 1; - unsigned int : 9; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union SPI_BARYC_CNTL { - struct { - unsigned int PERSP_CENTER_CNTL : 1; - unsigned int : 3; - unsigned int PERSP_CENTROID_CNTL : 1; - unsigned int : 3; - unsigned int LINEAR_CENTER_CNTL : 1; - unsigned int : 3; - unsigned int LINEAR_CENTROID_CNTL : 1; - unsigned int : 3; - unsigned int POS_FLOAT_LOCATION : 2; - unsigned int : 2; - unsigned int POS_FLOAT_ULC : 1; - unsigned int : 3; - unsigned int FRONT_FACE_ALL_BITS : 1; - unsigned int : 7; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_BARYC_SSAA_CNTL { - struct { - unsigned int CENTER_SSAA_MODE : 1; - unsigned int CENTROID_SSAA_MODE : 1; - unsigned int : 30; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_COMPUTE_QUEUE_RESET { - struct { - unsigned int RESET : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_COMPUTE_WF_CTX_SAVE { - struct { - unsigned int INITIATE : 1; - unsigned int GDS_INTERRUPT_EN : 1; - unsigned int DONE_INTERRUPT_EN : 1; - unsigned int : 27; - unsigned int GDS_REQ_BUSY : 1; - unsigned int SAVE_BUSY : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SPI_COMPUTE_WF_CTX_SAVE_STATUS { - struct { - unsigned int PIPE0_QUEUE0_SAVE_BUSY : 1; - unsigned int PIPE0_QUEUE1_SAVE_BUSY : 1; - unsigned int PIPE0_QUEUE2_SAVE_BUSY : 1; - unsigned int PIPE0_QUEUE3_SAVE_BUSY : 1; - unsigned int PIPE0_QUEUE4_SAVE_BUSY : 1; - unsigned int PIPE0_QUEUE5_SAVE_BUSY : 1; - unsigned int PIPE0_QUEUE6_SAVE_BUSY : 1; - unsigned int PIPE0_QUEUE7_SAVE_BUSY : 1; - unsigned int PIPE1_QUEUE0_SAVE_BUSY : 1; - unsigned int PIPE1_QUEUE1_SAVE_BUSY : 1; - unsigned int PIPE1_QUEUE2_SAVE_BUSY : 1; - unsigned int PIPE1_QUEUE3_SAVE_BUSY : 1; - unsigned int PIPE1_QUEUE4_SAVE_BUSY : 1; - unsigned int PIPE1_QUEUE5_SAVE_BUSY : 1; - unsigned int PIPE1_QUEUE6_SAVE_BUSY : 1; - unsigned int PIPE1_QUEUE7_SAVE_BUSY : 1; - unsigned int PIPE2_QUEUE0_SAVE_BUSY : 1; - unsigned int PIPE2_QUEUE1_SAVE_BUSY : 1; - unsigned int PIPE2_QUEUE2_SAVE_BUSY : 1; - unsigned int PIPE2_QUEUE3_SAVE_BUSY : 1; - unsigned int PIPE2_QUEUE4_SAVE_BUSY : 1; - unsigned int PIPE2_QUEUE5_SAVE_BUSY : 1; - unsigned int PIPE2_QUEUE6_SAVE_BUSY : 1; - unsigned int PIPE2_QUEUE7_SAVE_BUSY : 1; - unsigned int PIPE3_QUEUE0_SAVE_BUSY : 1; - unsigned int PIPE3_QUEUE1_SAVE_BUSY : 1; - unsigned int PIPE3_QUEUE2_SAVE_BUSY : 1; - unsigned int PIPE3_QUEUE3_SAVE_BUSY : 1; - unsigned int PIPE3_QUEUE4_SAVE_BUSY : 1; - unsigned int PIPE3_QUEUE5_SAVE_BUSY : 1; - unsigned int PIPE3_QUEUE6_SAVE_BUSY : 1; - unsigned int PIPE3_QUEUE7_SAVE_BUSY : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union SPI_CONFIG_CNTL { - struct { - unsigned int GPR_WRITE_PRIORITY : 21; - unsigned int EXP_PRIORITY_ORDER : 3; - unsigned int ENABLE_SQG_TOP_EVENTS : 1; - unsigned int ENABLE_SQG_BOP_EVENTS : 1; - unsigned int : 2; - unsigned int ALLOC_ARB_LRU_ENA : 1; - unsigned int EXP_ARB_LRU_ENA : 1; - unsigned int PS_PKR_PRIORITY_CNTL : 2; - } bits, bitfields; - struct { - unsigned int : 26; - unsigned int RSRC_MGMT_RESET : 1; - unsigned int : 5; - } most; - struct { - unsigned int : 27; - unsigned int TTRACE_STALL_ALL : 1; - unsigned int : 4; - } gfx09_10; - struct { - unsigned int : 26; - unsigned int FORCE_HALF_RATE_PC_EXP : 1; - unsigned int : 5; - } gfx103Derivative; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CONFIG_CNTL_1 { - struct { - unsigned int VTX_DONE_DELAY : 4; - unsigned int INTERP_ONE_PRIM_PER_ROW : 1; - unsigned int : 2; - unsigned int PC_LIMIT_STRICT : 1; - unsigned int : 6; - unsigned int CSC_PWR_SAVE_DISABLE : 1; - unsigned int CSG_PWR_SAVE_DISABLE : 1; - unsigned int : 16; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int CRC_SIMD_ID_WADDR_DISABLE : 1; - unsigned int LBPW_CU_CHK_MODE : 1; - unsigned int : 22; - } most; - struct { - unsigned int : 10; - unsigned int LBPW_CU_CHK_CNT : 4; - unsigned int : 18; - } core; - struct { - unsigned int : 5; - unsigned int BATON_RESET_DISABLE : 1; - unsigned int PC_LIMIT_ENABLE : 1; - unsigned int : 9; - unsigned int PC_LIMIT_SIZE : 16; - } gfx09; - struct { - unsigned int : 22; - unsigned int RESERVED : 10; - } gfx101; - struct { - unsigned int : 23; - unsigned int RESERVED : 9; - } gfx103; - struct { - unsigned int : 22; - unsigned int SA_SCREEN_MAP : 1; - unsigned int : 9; - } gfx103Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 8; - unsigned int PS_GROUP_TIMEOUT_MODE : 1; - unsigned int : 14; - unsigned int PS_GROUP_TIMEOUT : 9; - } gfx104Plus; -#endif - struct { - unsigned int : 5; - unsigned int PC_LIMIT_ENABLE : 2; - unsigned int : 9; - unsigned int MAX_VTX_SYNC_CNT : 5; - unsigned int EN_USER_ACCUM : 1; - unsigned int : 10; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 9; - unsigned int OREO_EXPALLOC_STALL : 1; - unsigned int : 22; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CONFIG_CNTL_1_REMAP { - struct { - unsigned int RESERVED : 32; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int RESERVED : 32; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int RESERVED : 32; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int RESERVED : 32; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int RESERVED : 32; - } nv24; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CONFIG_CNTL_2 { - struct { - unsigned int CONTEXT_SAVE_WAIT_GDS_REQUEST_CYCLE_OVHD : 4; - unsigned int CONTEXT_SAVE_WAIT_GDS_GRANT_CYCLE_OVHD : 4; - unsigned int : 24; - } bits, bitfields; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 8; - unsigned int PWS_CSG_WAIT_DISABLE : 1; - unsigned int PWS_HS_WAIT_DISABLE : 1; - unsigned int PWS_GS_WAIT_DISABLE : 1; - unsigned int PWS_PS_WAIT_DISABLE : 1; - unsigned int CSC_HALT_ACK_DELAY : 5; - unsigned int : 15; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CONFIG_CNTL_2_REMAP { - struct { - unsigned int RESERVED : 32; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int RESERVED : 32; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int RESERVED : 32; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int RESERVED : 32; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int RESERVED : 32; - } nv24; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CONFIG_CNTL_REMAP { - struct { - unsigned int RESERVED : 32; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int RESERVED : 32; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int RESERVED : 32; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int RESERVED : 32; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int RESERVED : 32; - } nv24; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CONFIG_PS_CU_EN { - struct { - unsigned int ENABLE : 1; - unsigned int PKR0_CU_EN : 15; - unsigned int PKR1_CU_EN : 16; - } gfx09; - struct { - unsigned int PKR_OFFSET : 4; - unsigned int : 28; - } gfx103CorePlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 4; - unsigned int PKR2_OFFSET : 4; - unsigned int PKR3_OFFSET : 4; - unsigned int : 20; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CSG_PIPE_CONTROL { - struct { - unsigned int HYSTERESIS_CNT : 13; - unsigned int : 19; - } gfx103CorePlus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CSQ_WF_ACTIVE_COUNT_0 { - struct { - unsigned int COUNT : 11; - unsigned int : 5; - unsigned int EVENTS : 11; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CSQ_WF_ACTIVE_COUNT_1 { - struct { - unsigned int COUNT : 11; - unsigned int : 5; - unsigned int EVENTS : 11; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CSQ_WF_ACTIVE_COUNT_2 { - struct { - unsigned int COUNT : 11; - unsigned int : 5; - unsigned int EVENTS : 11; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CSQ_WF_ACTIVE_COUNT_3 { - struct { - unsigned int COUNT : 11; - unsigned int : 5; - unsigned int EVENTS : 11; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CSQ_WF_ACTIVE_COUNT_4 { - struct { - unsigned int COUNT : 11; - unsigned int : 5; - unsigned int EVENTS : 11; - unsigned int : 5; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CSQ_WF_ACTIVE_COUNT_5 { - struct { - unsigned int COUNT : 11; - unsigned int : 5; - unsigned int EVENTS : 11; - unsigned int : 5; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CSQ_WF_ACTIVE_COUNT_6 { - struct { - unsigned int COUNT : 11; - unsigned int : 5; - unsigned int EVENTS : 11; - unsigned int : 5; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CSQ_WF_ACTIVE_COUNT_7 { - struct { - unsigned int COUNT : 11; - unsigned int : 5; - unsigned int EVENTS : 11; - unsigned int : 5; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CSQ_WF_ACTIVE_STATUS { - struct { - unsigned int ACTIVE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_CS_CRAWLER_CONFIG { - struct { - unsigned int CSG_DEPTH : 6; - unsigned int CSC_DEPTH : 6; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_DSM_CNTL { - struct { - unsigned int SPI_SR_MEM_DSM_IRRITATOR_DATA : 2; - unsigned int SPI_SR_MEM_ENABLE_SINGLE_WRITE : 1; - unsigned int : 29; - } bits, bitfields; - struct { - unsigned int : 3; - unsigned int UNUSED : 29; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_DSM_CNTL2 { - struct { - unsigned int SPI_SR_MEM_ENABLE_ERROR_INJECT : 2; - unsigned int SPI_SR_MEM_SELECT_INJECT_DELAY : 1; - unsigned int : 29; - } bits, bitfields; - struct { - unsigned int : 4; - unsigned int SPI_SR_MEM_INJECT_DELAY : 6; - unsigned int UNUSED : 22; - } gfx09; - struct { - unsigned int : 3; - unsigned int SPI_SR_MEM_INJECT_DELAY : 6; - unsigned int : 23; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_EDC_CNT { - struct { - unsigned int SPI_SR_MEM_SED_COUNT : 2; - unsigned int : 30; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_EXP_THROTTLE_CTRL { - struct { - unsigned int ENABLE : 1; - unsigned int PERIOD : 4; - unsigned int UPSTEP : 4; - unsigned int DOWNSTEP : 4; - unsigned int LOW_STALL_MON_HIST_COUNT : 3; - unsigned int HIGH_STALL_MON_HIST_COUNT : 3; - unsigned int EXP_STALL_THRESHOLD : 7; - unsigned int SKEW_COUNT : 3; - unsigned int THROTTLE_RESET : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_FEATURE_CTRL { - struct { - unsigned int : 28; - unsigned int TUNNELING_WAVE_LIMIT : 4; - } most; - struct { - unsigned int CU_LOCKING_FAIRNESS_DISABLE : 1; - unsigned int : 1; - unsigned int ALLOCATION_RATE_THROTTLE_THRESHOLD : 5; - unsigned int ACTIVE_HARD_LOCK_LIMIT : 5; - unsigned int LR_IMBALANCE_THRESHOLD : 6; - unsigned int RA_PIPE_DEPTH_THRESHOLD_ALLOC_STALL_EN : 1; - unsigned int BUS_ACTIVITY_THRESHOLD_ALLOC_STALL_EN : 1; - unsigned int BUS_ACTIVITY_THRESHOLD : 8; - unsigned int : 4; - } gfx101; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int TUNNELING_WAVE_LIMIT : 4; - unsigned int RA_PROBE_IGNORE : 1; - unsigned int PS_THROTTLE_MAX_WAVE_LIMIT : 6; - unsigned int RA_PROBE_SKEW_WIF_CTRL : 2; - unsigned int RA_PROBE_SKEW_OOO_CTRL : 1; - unsigned int RA_PROBE_SKEW_DISABLE : 1; - unsigned int : 17; - } gfx11; -#endif -#if CHIP_HDR_NAVI21 - struct { - unsigned int CU_LOCKING_FAIRNESS_DISABLE : 1; - unsigned int : 1; - unsigned int ALLOCATION_RATE_THROTTLE_THRESHOLD : 5; - unsigned int ACTIVE_HARD_LOCK_LIMIT : 5; - unsigned int LR_IMBALANCE_THRESHOLD : 6; - unsigned int RA_PIPE_DEPTH_THRESHOLD_ALLOC_STALL_EN : 1; - unsigned int BUS_ACTIVITY_THRESHOLD_ALLOC_STALL_EN : 1; - unsigned int BUS_ACTIVITY_THRESHOLD : 8; - unsigned int : 4; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int CU_LOCKING_FAIRNESS_DISABLE : 1; - unsigned int : 1; - unsigned int ALLOCATION_RATE_THROTTLE_THRESHOLD : 5; - unsigned int ACTIVE_HARD_LOCK_LIMIT : 5; - unsigned int LR_IMBALANCE_THRESHOLD : 6; - unsigned int RA_PIPE_DEPTH_THRESHOLD_ALLOC_STALL_EN : 1; - unsigned int BUS_ACTIVITY_THRESHOLD_ALLOC_STALL_EN : 1; - unsigned int BUS_ACTIVITY_THRESHOLD : 8; - unsigned int : 4; - } nv22; -#endif - struct { - unsigned int TUNNELING_WAVE_LIMIT : 4; - unsigned int : 28; - } raphael; - struct { - unsigned int TUNNELING_WAVE_LIMIT : 4; - unsigned int : 28; - } rembrandt; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_GDS_CREDITS { - struct { - unsigned int DS_DATA_CREDITS : 8; - unsigned int DS_CMD_CREDITS : 8; - unsigned int : 16; - } bits, bitfields; - struct { - unsigned int : 16; - unsigned int UNUSED : 16; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_GFX_CNTL { - struct { - unsigned int RESET_COUNTS : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_GFX_CRAWLER_CONFIG { - struct { - unsigned int PS_DEPTH : 5; - unsigned int GS_DEPTH : 6; - unsigned int HS_DEPTH : 6; - unsigned int : 15; - } bits, bitfields; - struct { - unsigned int : 17; - unsigned int VS_DEPTH : 5; - unsigned int : 10; - } gfx103Derivative; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 17; - unsigned int PS_ALLOC_DEPTH : 5; - unsigned int PS_LDS_DONE_DEPTH : 3; - unsigned int PS_LDS_DONE_CNTL : 1; - unsigned int RA_PSWAVE_CREDITS : 3; - unsigned int : 3; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SPI_GFX_SCRATCH_BASE_HI { - struct { - unsigned int DATA : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SPI_GFX_SCRATCH_BASE_LO { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SPI_GS_THROTTLE_CNTL1 { - struct { - unsigned int PH_POLL_INTERVAL : 4; - unsigned int PH_THROTTLE_BASE : 4; - unsigned int PH_THROTTLE_STEP_SIZE : 4; - unsigned int SPI_VGPR_THRESHOLD : 4; - unsigned int SPI_LDS_THRESHOLD : 4; - unsigned int SPI_POLL_INTERVAL : 4; - unsigned int SPI_THROTTLE_BASE : 4; - unsigned int SPI_THROTTLE_STEP_SIZE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SPI_GS_THROTTLE_CNTL2 { - struct { - unsigned int SPI_THROTTLE_MODE : 2; - unsigned int GRP_LIFETIME_THRESHOLD : 4; - unsigned int GRP_LIFETIME_THRESHOLD_FACTOR : 2; - unsigned int GRP_LIFETIME_PENALTY1 : 3; - unsigned int GRP_LIFETIME_PENALTY2 : 3; - unsigned int PS_STALL_THRESHOLD : 2; - unsigned int PH_MODE : 1; - unsigned int RESERVED : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union SPI_IND_DATA { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_IND_INDEX { - struct { - unsigned int INDEX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_INTERP_CONTROL_0 { - struct { - unsigned int FLAT_SHADE_ENA : 1; - unsigned int PNT_SPRITE_ENA : 1; - unsigned int PNT_SPRITE_OVRD_X : 3; - unsigned int PNT_SPRITE_OVRD_Y : 3; - unsigned int PNT_SPRITE_OVRD_Z : 3; - unsigned int PNT_SPRITE_OVRD_W : 3; - unsigned int PNT_SPRITE_TOP_1 : 1; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_LB_CTR_CTRL { - struct { - unsigned int LOAD : 1; - unsigned int WAVES_SELECT : 2; - unsigned int CLEAR_ON_READ : 1; - unsigned int RESET_COUNTS : 1; - unsigned int : 27; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_LB_CU_MASK { - struct { - unsigned int CU_MASK : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_LB_DATA_PERCU_WAVE_CS { - struct { - unsigned int ACTIVE : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_LB_DATA_PERCU_WAVE_HSGS { - struct { - unsigned int CU_USED_HS : 16; - unsigned int CU_USED_GS : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_LB_DATA_PERCU_WAVE_VSPS { - struct { - unsigned int CU_USED_VS : 16; - unsigned int CU_USED_PS : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_LB_DATA_PERWGP_WAVE_CS { - struct { - unsigned int ACTIVE : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_LB_DATA_PERWGP_WAVE_HSGS { - struct { - unsigned int WGP_USED_HS : 16; - unsigned int WGP_USED_GS : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SPI_LB_DATA_PERWGP_WAVE_PS { - struct { - unsigned int WGP_USED_PS : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union SPI_LB_DATA_PERWGP_WAVE_VSPS { - struct { - unsigned int WGP_USED_VS : 16; - unsigned int WGP_USED_PS : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_LB_DATA_REG { - struct { - unsigned int CNT_DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_LB_DATA_WAVES { - struct { - unsigned int COUNT0 : 16; - unsigned int COUNT1 : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_LB_WGP_MASK { - struct { - unsigned int WGP_MASK : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_P0_TRAP_SCREEN_GPR_MIN { - struct { - unsigned int VGPR_MIN : 6; - unsigned int SGPR_MIN : 4; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_P0_TRAP_SCREEN_PSBA_HI { - struct { - unsigned int MEM_BASE : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_P0_TRAP_SCREEN_PSBA_LO { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_P0_TRAP_SCREEN_PSMA_HI { - struct { - unsigned int MEM_BASE : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_P0_TRAP_SCREEN_PSMA_LO { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_P1_TRAP_SCREEN_GPR_MIN { - struct { - unsigned int VGPR_MIN : 6; - unsigned int SGPR_MIN : 4; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_P1_TRAP_SCREEN_PSBA_HI { - struct { - unsigned int MEM_BASE : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_P1_TRAP_SCREEN_PSBA_LO { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_P1_TRAP_SCREEN_PSMA_HI { - struct { - unsigned int MEM_BASE : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_P1_TRAP_SCREEN_PSMA_LO { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER2_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER3_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER4_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER4_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER4_SELECT { - struct { - unsigned int PERF_SEL : 8; - unsigned int : 24; - } gfx09; - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER5_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER5_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER5_SELECT { - struct { - unsigned int PERF_SEL : 8; - unsigned int : 24; - } gfx09; - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PERFCOUNTER_BINS { - struct { - unsigned int BIN0_MIN : 4; - unsigned int BIN0_MAX : 4; - unsigned int BIN1_MIN : 4; - unsigned int BIN1_MAX : 4; - unsigned int BIN2_MIN : 4; - unsigned int BIN2_MAX : 4; - unsigned int BIN3_MIN : 4; - unsigned int BIN3_MAX : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PG_ENABLE_STATIC_CU_MASK { - struct { - unsigned int CU_MASK : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PG_ENABLE_STATIC_WGP_MASK { - struct { - unsigned int WGP_MASK : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PQEV_CTRL { - struct { - unsigned int SCAN_PERIOD : 10; - unsigned int QUEUE_DURATION : 6; - unsigned int COMPUTE_PIPE_EN : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_ADDR { - struct { - unsigned int PERSP_SAMPLE_ENA : 1; - unsigned int PERSP_CENTER_ENA : 1; - unsigned int PERSP_CENTROID_ENA : 1; - unsigned int PERSP_PULL_MODEL_ENA : 1; - unsigned int LINEAR_SAMPLE_ENA : 1; - unsigned int LINEAR_CENTER_ENA : 1; - unsigned int LINEAR_CENTROID_ENA : 1; - unsigned int LINE_STIPPLE_TEX_ENA : 1; - unsigned int POS_X_FLOAT_ENA : 1; - unsigned int POS_Y_FLOAT_ENA : 1; - unsigned int POS_Z_FLOAT_ENA : 1; - unsigned int POS_W_FLOAT_ENA : 1; - unsigned int FRONT_FACE_ENA : 1; - unsigned int ANCILLARY_ENA : 1; - unsigned int SAMPLE_COVERAGE_ENA : 1; - unsigned int POS_FIXED_PT_ENA : 1; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_0 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_1 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_2 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_3 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_4 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_5 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_6 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_7 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_8 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_9 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_10 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_11 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_12 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_13 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_14 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_15 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_16 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_17 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_18 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_19 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 6; - unsigned int PT_SPRITE_TEX : 1; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int PT_SPRITE_TEX_ATTR1 : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 13; - unsigned int CYL_WRAP : 4; - unsigned int : 15; - } gfx09_10; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_20 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 7; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_21 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 7; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_22 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 7; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_23 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 7; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_24 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 7; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_25 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 7; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_26 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 7; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_27 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 7; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_28 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 7; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_29 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 7; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_30 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 7; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_CNTL_31 { - struct { - unsigned int OFFSET : 6; - unsigned int : 2; - unsigned int DEFAULT_VAL : 2; - unsigned int FLAT_SHADE : 1; - unsigned int : 7; - unsigned int DUP : 1; - unsigned int FP16_INTERP_MODE : 1; - unsigned int USE_DEFAULT_ATTR1 : 1; - unsigned int DEFAULT_VAL_ATTR1 : 2; - unsigned int : 1; - unsigned int ATTR0_VALID : 1; - unsigned int ATTR1_VALID : 1; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 11; - unsigned int ROTATE_PC_PTR : 1; - unsigned int : 20; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int PRIM_ATTR : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_INPUT_ENA { - struct { - unsigned int PERSP_SAMPLE_ENA : 1; - unsigned int PERSP_CENTER_ENA : 1; - unsigned int PERSP_CENTROID_ENA : 1; - unsigned int PERSP_PULL_MODEL_ENA : 1; - unsigned int LINEAR_SAMPLE_ENA : 1; - unsigned int LINEAR_CENTER_ENA : 1; - unsigned int LINEAR_CENTROID_ENA : 1; - unsigned int LINE_STIPPLE_TEX_ENA : 1; - unsigned int POS_X_FLOAT_ENA : 1; - unsigned int POS_Y_FLOAT_ENA : 1; - unsigned int POS_Z_FLOAT_ENA : 1; - unsigned int POS_W_FLOAT_ENA : 1; - unsigned int FRONT_FACE_ENA : 1; - unsigned int ANCILLARY_ENA : 1; - unsigned int SAMPLE_COVERAGE_ENA : 1; - unsigned int POS_FIXED_PT_ENA : 1; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_IN_CONTROL { - struct { - unsigned int NUM_INTERP : 6; - unsigned int PARAM_GEN : 1; - unsigned int OFFCHIP_PARAM_EN : 1; - unsigned int LATE_PC_DEALLOC : 1; - unsigned int : 5; - unsigned int BC_OPTIMIZE_DISABLE : 1; - unsigned int : 17; - } bits, bitfields; - struct { - unsigned int : 9; - unsigned int NUM_PRIM_INTERP : 5; - unsigned int : 18; - } gfx103PlusExclusive; - struct { - unsigned int : 15; - unsigned int PS_W32_EN : 1; - unsigned int : 16; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_PS_MAX_WAVE_ID { - struct { - unsigned int MAX_WAVE_ID : 12; - unsigned int : 4; - unsigned int MAX_COLLISION_WAVE_ID : 10; - unsigned int : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_0 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_1 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_2 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_3 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_4 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_5 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_6 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_7 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_8 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_9 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_10 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_11 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_12 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_13 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_14 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_CU_15 { - struct { - unsigned int VGPR : 4; - unsigned int SGPR : 4; - unsigned int LDS : 4; - unsigned int WAVES : 3; - unsigned int BARRIERS : 4; - unsigned int : 13; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_0 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_1 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_2 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_3 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_4 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_5 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_6 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_7 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_8 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_9 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_10 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_11 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_12 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_13 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_14 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_RESOURCE_RESERVE_EN_CU_15 { - struct { - unsigned int EN : 1; - unsigned int TYPE_MASK : 15; - unsigned int QUEUE_MASK : 8; - unsigned int : 8; - } most; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int : 24; - unsigned int RESERVE_SPACE_ONLY : 1; - unsigned int : 7; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_COL_FORMAT { - struct { - unsigned int COL0_EXPORT_FORMAT : 4; - unsigned int COL1_EXPORT_FORMAT : 4; - unsigned int COL2_EXPORT_FORMAT : 4; - unsigned int COL3_EXPORT_FORMAT : 4; - unsigned int COL4_EXPORT_FORMAT : 4; - unsigned int COL5_EXPORT_FORMAT : 4; - unsigned int COL6_EXPORT_FORMAT : 4; - unsigned int COL7_EXPORT_FORMAT : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SPI_SHADER_GS_MESHLET_DIM { - struct { - unsigned int MESHLET_NUM_THREAD_X : 8; - unsigned int MESHLET_NUM_THREAD_Y : 8; - unsigned int MESHLET_NUM_THREAD_Z : 8; - unsigned int MESHLET_THREADGROUP_SIZE : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SPI_SHADER_GS_MESHLET_EXP_ALLOC { - struct { - unsigned int MAX_EXP_VERTS : 9; - unsigned int MAX_EXP_PRIMS : 9; - unsigned int : 14; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union SPI_SHADER_IDX_FORMAT { - struct { - unsigned int IDX0_EXPORT_FORMAT : 4; - unsigned int : 28; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_LATE_ALLOC_VS { - struct { - unsigned int LIMIT : 6; - unsigned int : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_CHKSUM_GS { - struct { - unsigned int CHECKSUM : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_CHKSUM_HS { - struct { - unsigned int CHECKSUM : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_CHKSUM_PS { - struct { - unsigned int CHECKSUM : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_CHKSUM_VS { - struct { - unsigned int CHECKSUM : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_HI_ES { - struct { - unsigned int MEM_BASE : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_HI_ES_GS { - struct { - unsigned int MEM_BASE : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_HI_GS { - struct { - unsigned int MEM_BASE : 8; - unsigned int : 24; - } gfx09_10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int MEM_BASE : 32; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_HI_HS { - struct { - unsigned int MEM_BASE : 8; - unsigned int : 24; - } gfx09_10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int MEM_BASE : 32; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_HI_LS { - struct { - unsigned int MEM_BASE : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_HI_LS_HS { - struct { - unsigned int MEM_BASE : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_HI_PS { - struct { - unsigned int MEM_BASE : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_HI_VS { - struct { - unsigned int MEM_BASE : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_LO_ES { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_LO_ES_GS { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_LO_GS { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_LO_HS { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_LO_LS { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_LO_LS_HS { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_LO_PS { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_LO_VS { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC1_ES { - struct { - unsigned int VGPRS : 6; - unsigned int SGPRS : 4; - unsigned int PRIORITY : 2; - unsigned int FLOAT_MODE : 8; - unsigned int PRIV : 1; - unsigned int DX10_CLAMP : 1; - unsigned int DEBUG_MODE : 1; - unsigned int IEEE_MODE : 1; - unsigned int VGPR_COMP_CNT : 2; - unsigned int CU_GROUP_ENABLE : 1; - unsigned int : 3; - unsigned int CDBG_USER : 1; - unsigned int FP16_OVFL : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC1_GS { - struct { - unsigned int VGPRS : 6; - unsigned int SGPRS : 4; - unsigned int PRIORITY : 2; - unsigned int FLOAT_MODE : 8; - unsigned int PRIV : 1; - unsigned int DX10_CLAMP : 1; - unsigned int DEBUG_MODE : 1; - unsigned int IEEE_MODE : 1; - unsigned int CU_GROUP_ENABLE : 1; - unsigned int : 3; - unsigned int CDBG_USER : 1; - unsigned int GS_VGPR_COMP_CNT : 2; - unsigned int FP16_OVFL : 1; - } bits, bitfields; - struct { - unsigned int : 25; - unsigned int MEM_ORDERED : 1; - unsigned int FWD_PROGRESS : 1; - unsigned int WGP_MODE : 1; - unsigned int : 4; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC1_HS { - struct { - unsigned int VGPRS : 6; - unsigned int SGPRS : 4; - unsigned int PRIORITY : 2; - unsigned int FLOAT_MODE : 8; - unsigned int PRIV : 1; - unsigned int DX10_CLAMP : 1; - unsigned int DEBUG_MODE : 1; - unsigned int IEEE_MODE : 1; - unsigned int : 3; - unsigned int CDBG_USER : 1; - unsigned int LS_VGPR_COMP_CNT : 2; - unsigned int FP16_OVFL : 1; - unsigned int : 1; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int MEM_ORDERED : 1; - unsigned int FWD_PROGRESS : 1; - unsigned int WGP_MODE : 1; - unsigned int : 5; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC1_LS { - struct { - unsigned int VGPRS : 6; - unsigned int SGPRS : 4; - unsigned int PRIORITY : 2; - unsigned int FLOAT_MODE : 8; - unsigned int PRIV : 1; - unsigned int DX10_CLAMP : 1; - unsigned int DEBUG_MODE : 1; - unsigned int IEEE_MODE : 1; - unsigned int VGPR_COMP_CNT : 2; - unsigned int : 3; - unsigned int CDBG_USER : 1; - unsigned int FP16_OVFL : 1; - unsigned int : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC1_PS { - struct { - unsigned int VGPRS : 6; - unsigned int SGPRS : 4; - unsigned int PRIORITY : 2; - unsigned int FLOAT_MODE : 8; - unsigned int PRIV : 1; - unsigned int DX10_CLAMP : 1; - unsigned int DEBUG_MODE : 1; - unsigned int IEEE_MODE : 1; - unsigned int CU_GROUP_DISABLE : 1; - unsigned int : 3; - unsigned int CDBG_USER : 1; - unsigned int FP16_OVFL : 1; - unsigned int : 2; - } bits, bitfields; - struct { - unsigned int : 27; - unsigned int LOAD_PROVOKING_VTX : 1; - unsigned int : 4; - } gfx103PlusExclusive; - struct { - unsigned int : 25; - unsigned int MEM_ORDERED : 1; - unsigned int FWD_PROGRESS : 1; - unsigned int : 5; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC1_VS { - struct { - unsigned int VGPRS : 6; - unsigned int SGPRS : 4; - unsigned int PRIORITY : 2; - unsigned int FLOAT_MODE : 8; - unsigned int PRIV : 1; - unsigned int DX10_CLAMP : 1; - unsigned int DEBUG_MODE : 1; - unsigned int IEEE_MODE : 1; - unsigned int VGPR_COMP_CNT : 2; - unsigned int CU_GROUP_ENABLE : 1; - unsigned int : 3; - unsigned int CDBG_USER : 1; - unsigned int FP16_OVFL : 1; - } bits, bitfields; - struct { - unsigned int : 27; - unsigned int MEM_ORDERED : 1; - unsigned int FWD_PROGRESS : 1; - unsigned int : 3; - } gfx10; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC2_ES { - struct { - unsigned int SCRATCH_EN : 1; - unsigned int USER_SGPR : 5; - unsigned int TRAP_PRESENT : 1; - unsigned int OC_LDS_EN : 1; - unsigned int EXCP_EN : 9; - unsigned int : 3; - unsigned int LDS_SIZE : 9; - unsigned int : 3; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC2_ES_GS { - struct { - unsigned int SCRATCH_EN : 1; - unsigned int USER_SGPR : 5; - unsigned int TRAP_PRESENT : 1; - unsigned int OC_LDS_EN : 1; - unsigned int EXCP_EN : 9; - unsigned int : 3; - unsigned int LDS_SIZE : 9; - unsigned int : 3; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC2_ES_VS { - struct { - unsigned int SCRATCH_EN : 1; - unsigned int USER_SGPR : 5; - unsigned int TRAP_PRESENT : 1; - unsigned int OC_LDS_EN : 1; - unsigned int EXCP_EN : 9; - unsigned int : 3; - unsigned int LDS_SIZE : 9; - unsigned int : 3; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC2_GS { - struct { - unsigned int SCRATCH_EN : 1; - unsigned int USER_SGPR : 5; - unsigned int TRAP_PRESENT : 1; - unsigned int EXCP_EN : 9; - unsigned int ES_VGPR_COMP_CNT : 2; - unsigned int OC_LDS_EN : 1; - unsigned int LDS_SIZE : 8; - unsigned int : 5; - } bits, bitfields; - struct { - unsigned int : 27; - unsigned int SKIP_USGPR0 : 1; - unsigned int USER_SGPR_MSB : 1; - unsigned int : 3; - } gfx09; - struct { - unsigned int : 27; - unsigned int USER_SGPR_MSB : 1; - unsigned int SHARED_VGPR_CNT : 4; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC2_GS_VS { - struct { - unsigned int SCRATCH_EN : 1; - unsigned int USER_SGPR : 5; - unsigned int TRAP_PRESENT : 1; - unsigned int EXCP_EN : 9; - unsigned int VGPR_COMP_CNT : 2; - unsigned int OC_LDS_EN : 1; - unsigned int LDS_SIZE : 8; - unsigned int SKIP_USGPR0 : 1; - unsigned int USER_SGPR_MSB : 1; - unsigned int : 3; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC2_HS { - struct { - unsigned int SCRATCH_EN : 1; - unsigned int USER_SGPR : 5; - unsigned int TRAP_PRESENT : 1; - unsigned int : 25; - } bits, bitfields; - struct { - unsigned int : 7; - unsigned int EXCP_EN : 9; - unsigned int LDS_SIZE : 9; - unsigned int : 2; - unsigned int SKIP_USGPR0 : 1; - unsigned int USER_SGPR_MSB : 1; - unsigned int : 3; - } gfx09; - struct { - unsigned int : 7; - unsigned int OC_LDS_EN : 1; - unsigned int TG_SIZE_EN : 1; - unsigned int EXCP_EN : 9; - unsigned int LDS_SIZE : 9; - unsigned int USER_SGPR_MSB : 1; - unsigned int SHARED_VGPR_CNT : 4; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC2_LS { - struct { - unsigned int SCRATCH_EN : 1; - unsigned int USER_SGPR : 5; - unsigned int TRAP_PRESENT : 1; - unsigned int LDS_SIZE : 9; - unsigned int EXCP_EN : 9; - unsigned int : 7; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC2_LS_ES { - struct { - unsigned int SCRATCH_EN : 1; - unsigned int USER_SGPR : 5; - unsigned int TRAP_PRESENT : 1; - unsigned int LDS_SIZE : 9; - unsigned int EXCP_EN : 9; - unsigned int : 7; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC2_LS_HS { - struct { - unsigned int SCRATCH_EN : 1; - unsigned int USER_SGPR : 5; - unsigned int TRAP_PRESENT : 1; - unsigned int LDS_SIZE : 9; - unsigned int EXCP_EN : 9; - unsigned int : 7; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC2_LS_VS { - struct { - unsigned int SCRATCH_EN : 1; - unsigned int USER_SGPR : 5; - unsigned int TRAP_PRESENT : 1; - unsigned int LDS_SIZE : 9; - unsigned int EXCP_EN : 9; - unsigned int : 7; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC2_PS { - struct { - unsigned int SCRATCH_EN : 1; - unsigned int USER_SGPR : 5; - unsigned int TRAP_PRESENT : 1; - unsigned int WAVE_CNT_EN : 1; - unsigned int EXTRA_LDS_SIZE : 8; - unsigned int EXCP_EN : 9; - unsigned int LOAD_COLLISION_WAVEID : 1; - unsigned int LOAD_INTRAWAVE_COLLISION : 1; - unsigned int : 5; - } bits, bitfields; - struct { - unsigned int : 27; - unsigned int SKIP_USGPR0 : 1; - unsigned int USER_SGPR_MSB : 1; - unsigned int : 3; - } gfx09; - struct { - unsigned int : 27; - unsigned int USER_SGPR_MSB : 1; - unsigned int SHARED_VGPR_CNT : 4; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC2_VS { - struct { - unsigned int SCRATCH_EN : 1; - unsigned int USER_SGPR : 5; - unsigned int TRAP_PRESENT : 1; - unsigned int OC_LDS_EN : 1; - unsigned int SO_BASE0_EN : 1; - unsigned int SO_BASE1_EN : 1; - unsigned int SO_BASE2_EN : 1; - unsigned int SO_BASE3_EN : 1; - unsigned int SO_EN : 1; - unsigned int EXCP_EN : 9; - unsigned int PC_BASE_EN : 1; - unsigned int : 9; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int DISPATCH_DRAW_EN : 1; - unsigned int : 7; - } most; - struct { - unsigned int : 27; - unsigned int SKIP_USGPR0 : 1; - unsigned int USER_SGPR_MSB : 1; - unsigned int : 3; - } gfx09; - struct { - unsigned int : 27; - unsigned int USER_SGPR_MSB : 1; - unsigned int SHARED_VGPR_CNT : 4; - } gfx10; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC3_ES { - struct { - unsigned int CU_EN : 16; - unsigned int WAVE_LIMIT : 6; - unsigned int LOCK_LOW_THRESHOLD : 4; - unsigned int GROUP_FIFO_DEPTH : 6; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC3_GS { - struct { - unsigned int CU_EN : 16; - unsigned int WAVE_LIMIT : 6; - unsigned int LOCK_LOW_THRESHOLD : 4; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 26; - unsigned int SIMD_DISABLE : 4; - unsigned int : 2; - } gfx09; - struct { - unsigned int : 26; - unsigned int GROUP_FIFO_DEPTH : 6; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC3_HS { - struct { - unsigned int WAVE_LIMIT : 6; - unsigned int LOCK_LOW_THRESHOLD : 4; - unsigned int : 6; - unsigned int CU_EN : 16; - } bits, bitfields; - struct { - unsigned int : 10; - unsigned int SIMD_DISABLE : 4; - unsigned int : 18; - } gfx09; - struct { - unsigned int : 10; - unsigned int GROUP_FIFO_DEPTH : 6; - unsigned int : 16; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC3_LS { - struct { - unsigned int CU_EN : 16; - unsigned int WAVE_LIMIT : 6; - unsigned int LOCK_LOW_THRESHOLD : 4; - unsigned int GROUP_FIFO_DEPTH : 6; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC3_PS { - struct { - unsigned int CU_EN : 16; - unsigned int WAVE_LIMIT : 6; - unsigned int : 10; - } bits, bitfields; - struct { - unsigned int : 22; - unsigned int LOCK_LOW_THRESHOLD : 4; - unsigned int : 6; - } most; - struct { - unsigned int : 26; - unsigned int SIMD_DISABLE : 4; - unsigned int : 2; - } gfx09; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 22; - unsigned int LDS_GROUP_SIZE : 2; - unsigned int : 8; - } gfx104Plus; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC3_VS { - struct { - unsigned int CU_EN : 16; - unsigned int WAVE_LIMIT : 6; - unsigned int LOCK_LOW_THRESHOLD : 4; - unsigned int : 6; - } bits, bitfields; - struct { - unsigned int : 26; - unsigned int SIMD_DISABLE : 4; - unsigned int : 2; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC4_GS { - struct { - unsigned int GROUP_FIFO_DEPTH : 7; - unsigned int SPI_SHADER_LATE_ALLOC_GS : 7; - unsigned int : 18; - } gfx09; - struct { - unsigned int CU_EN : 16; - unsigned int : 16; - } gfx10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 23; - unsigned int INST_PREF_SIZE : 6; - unsigned int : 3; - } gfx104Plus; -#endif - struct { - unsigned int : 16; - unsigned int SPI_SHADER_LATE_ALLOC_GS : 7; - unsigned int : 9; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int CU_EN : 1; - unsigned int RESERVED : 13; - unsigned int PH_THROTTLE_EN : 1; - unsigned int SPI_THROTTLE_EN : 1; - unsigned int : 13; - unsigned int TRAP_ON_START : 1; - unsigned int TRAP_ON_END : 1; - unsigned int IMAGE_OP : 1; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC4_HS { - struct { - unsigned int GROUP_FIFO_DEPTH : 7; - unsigned int : 25; - } gfx09; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 16; - unsigned int INST_PREF_SIZE : 6; - unsigned int : 10; - } gfx104Plus; -#endif - struct { - unsigned int CU_EN : 16; - unsigned int : 16; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 29; - unsigned int TRAP_ON_START : 1; - unsigned int TRAP_ON_END : 1; - unsigned int IMAGE_OP : 1; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC4_PS { - struct { - unsigned int CU_EN : 16; - unsigned int : 16; - } bits, bitfields; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 16; - unsigned int INST_PREF_SIZE : 6; - unsigned int : 10; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 29; - unsigned int TRAP_ON_START : 1; - unsigned int TRAP_ON_END : 1; - unsigned int IMAGE_OP : 1; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PGM_RSRC4_VS { - struct { - unsigned int CU_EN : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_POS_FORMAT { - struct { - unsigned int POS0_EXPORT_FORMAT : 4; - unsigned int POS1_EXPORT_FORMAT : 4; - unsigned int POS2_EXPORT_FORMAT : 4; - unsigned int POS3_EXPORT_FORMAT : 4; - unsigned int : 16; - } bits, bitfields; - struct { - unsigned int : 16; - unsigned int POS4_EXPORT_FORMAT : 4; - unsigned int : 12; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_ESGS_0 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_ESGS_1 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_ESGS_2 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_ESGS_3 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_LSHS_0 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_LSHS_1 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_LSHS_2 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_LSHS_3 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_PS_0 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_PS_1 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_PS_2 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_PS_3 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_VS_0 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_VS_1 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_VS_2 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_ACCUM_VS_3 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int COEFFICIENT_HIER_SELECT : 3; - unsigned int CONTRIBUTION_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int COEFFICIENT : 8; - unsigned int : 9; - } most; - struct { - unsigned int : 14; - unsigned int RESERVED : 1; - unsigned int : 17; - } gfx101; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_CNTR_CTRL_ESGS { - struct { - unsigned int TOTAL_WAVE_COUNT_HIER_SELECT : 3; - unsigned int PER_TYPE_WAVE_COUNT_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int TOTAL_WAVE_COUNT_COEFFICIENT : 8; - unsigned int PER_TYPE_WAVE_COUNT_COEFFICIENT : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_CNTR_CTRL_LSHS { - struct { - unsigned int TOTAL_WAVE_COUNT_HIER_SELECT : 3; - unsigned int PER_TYPE_WAVE_COUNT_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int TOTAL_WAVE_COUNT_COEFFICIENT : 8; - unsigned int PER_TYPE_WAVE_COUNT_COEFFICIENT : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_CNTR_CTRL_PS { - struct { - unsigned int TOTAL_WAVE_COUNT_HIER_SELECT : 3; - unsigned int PER_TYPE_WAVE_COUNT_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int TOTAL_WAVE_COUNT_COEFFICIENT : 8; - unsigned int PER_TYPE_WAVE_COUNT_COEFFICIENT : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_PREF_PRI_CNTR_CTRL_VS { - struct { - unsigned int TOTAL_WAVE_COUNT_HIER_SELECT : 3; - unsigned int PER_TYPE_WAVE_COUNT_HIER_SELECT : 3; - unsigned int GROUP_UPDATE_EN : 1; - unsigned int : 1; - unsigned int TOTAL_WAVE_COUNT_COEFFICIENT : 8; - unsigned int PER_TYPE_WAVE_COUNT_COEFFICIENT : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_REQ_CTRL_ESGS { - struct { - unsigned int SOFT_GROUPING_EN : 1; - unsigned int NUMBER_OF_REQUESTS_PER_CU : 4; - unsigned int SOFT_GROUPING_ALLOCATION_TIMEOUT : 4; - unsigned int HARD_LOCK_HYSTERESIS : 1; - unsigned int HARD_LOCK_LOW_THRESHOLD : 5; - unsigned int PRODUCER_REQUEST_LOCKOUT : 1; - unsigned int GLOBAL_SCANNING_EN : 1; - unsigned int ALLOCATION_RATE_THROTTLING_THRESHOLD : 3; - unsigned int : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_REQ_CTRL_LSHS { - struct { - unsigned int SOFT_GROUPING_EN : 1; - unsigned int NUMBER_OF_REQUESTS_PER_CU : 4; - unsigned int SOFT_GROUPING_ALLOCATION_TIMEOUT : 4; - unsigned int HARD_LOCK_HYSTERESIS : 1; - unsigned int HARD_LOCK_LOW_THRESHOLD : 5; - unsigned int PRODUCER_REQUEST_LOCKOUT : 1; - unsigned int GLOBAL_SCANNING_EN : 1; - unsigned int ALLOCATION_RATE_THROTTLING_THRESHOLD : 3; - unsigned int : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_REQ_CTRL_PS { - struct { - unsigned int SOFT_GROUPING_EN : 1; - unsigned int NUMBER_OF_REQUESTS_PER_CU : 4; - unsigned int SOFT_GROUPING_ALLOCATION_TIMEOUT : 4; - unsigned int HARD_LOCK_HYSTERESIS : 1; - unsigned int HARD_LOCK_LOW_THRESHOLD : 5; - unsigned int PRODUCER_REQUEST_LOCKOUT : 1; - unsigned int GLOBAL_SCANNING_EN : 1; - unsigned int ALLOCATION_RATE_THROTTLING_THRESHOLD : 3; - unsigned int : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_REQ_CTRL_VS { - struct { - unsigned int SOFT_GROUPING_EN : 1; - unsigned int NUMBER_OF_REQUESTS_PER_CU : 4; - unsigned int SOFT_GROUPING_ALLOCATION_TIMEOUT : 4; - unsigned int HARD_LOCK_HYSTERESIS : 1; - unsigned int HARD_LOCK_LOW_THRESHOLD : 5; - unsigned int PRODUCER_REQUEST_LOCKOUT : 1; - unsigned int GLOBAL_SCANNING_EN : 1; - unsigned int ALLOCATION_RATE_THROTTLING_THRESHOLD : 3; - unsigned int : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_RSRC_LIMIT_CTRL { - struct { - unsigned int WAVES_PER_SIMD32 : 5; - unsigned int VGPR_PER_SIMD32 : 7; - unsigned int VGPR_WRAP_DISABLE : 1; - unsigned int BARRIER_LIMIT : 6; - unsigned int BARRIER_LIMIT_HIERARCHY_LEVEL : 1; - unsigned int LDS_LIMIT : 8; - unsigned int LDS_LIMIT_HIERARCHY_LEVEL : 1; - unsigned int : 2; - unsigned int PERFORMANCE_LIMIT_ENABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_ESGS_0 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_ESGS_1 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_ESGS_2 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_ESGS_3 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_LSHS_0 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_LSHS_1 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_LSHS_2 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_LSHS_3 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_PS_0 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_PS_1 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_PS_2 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_PS_3 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_VS_0 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_VS_1 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_VS_2 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_ACCUM_VS_3 { - struct { - unsigned int CONTRIBUTION : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ADDR_HI_GS { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ADDR_HI_HS { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ADDR_LO_GS { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ADDR_LO_HS { - struct { - unsigned int MEM_BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_0 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_1 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_2 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_3 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_4 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_5 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_6 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_7 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_8 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_9 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_10 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_11 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_12 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_13 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_14 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_15 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_16 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_17 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_18 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_19 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_20 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_21 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_22 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_23 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_24 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_25 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_26 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_27 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_28 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_29 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_30 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_COMMON_31 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_0 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_1 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_2 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_3 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_4 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_5 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_6 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_7 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_8 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_9 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_10 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_11 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_12 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_13 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_14 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_15 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_16 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_17 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_18 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_19 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_20 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_21 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_22 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_23 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_24 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_25 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_26 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_27 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_28 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_29 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_30 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_ES_31 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_0 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_1 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_2 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_3 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_4 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_5 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_6 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_7 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_8 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_9 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_10 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_11 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_12 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_13 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_14 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_15 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_16 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_17 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_18 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_19 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_20 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_21 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_22 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_23 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_24 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_25 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_26 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_27 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_28 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_29 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_30 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_GS_31 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_0 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_1 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_2 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_3 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_4 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_5 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_6 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_7 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_8 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_9 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_10 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_11 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_12 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_13 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_14 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_15 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_16 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_17 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_18 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_19 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_20 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_21 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_22 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_23 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_24 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_25 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_26 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_27 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_28 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_29 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_30 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_HS_31 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_0 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_1 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_2 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_3 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_4 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_5 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_6 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_7 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_8 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_9 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_10 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_11 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_12 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_13 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_14 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_15 { - struct { - unsigned int DATA : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_16 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_17 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_18 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_19 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_20 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_21 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_22 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_23 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_24 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_25 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_26 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_27 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_28 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_29 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_30 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_LS_31 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_0 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_1 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_2 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_3 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_4 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_5 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_6 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_7 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_8 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_9 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_10 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_11 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_12 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_13 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_14 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_15 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_16 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_17 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_18 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_19 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_20 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_21 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_22 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_23 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_24 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_25 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_26 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_27 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_28 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_29 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_30 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_PS_31 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_0 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_1 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_2 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_3 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_4 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_5 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_6 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_7 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_8 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_9 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_10 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_11 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_12 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_13 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_14 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_15 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_16 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_17 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_18 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_19 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_20 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_21 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_22 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_23 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_24 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_25 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_26 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_27 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_28 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_29 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_30 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_USER_DATA_VS_31 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SHADER_Z_FORMAT { - struct { - unsigned int Z_EXPORT_FORMAT : 4; - unsigned int : 28; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_START_PHASE { - struct { - unsigned int VGPR_START_PHASE : 2; - unsigned int SGPR_START_PHASE : 2; - unsigned int WAVE_START_PHASE : 2; - unsigned int : 26; - } gfx09; - struct { - unsigned int PC_X_PHASE_SE0 : 2; - unsigned int PC_X_PHASE_SE1 : 2; - unsigned int PC_X_PHASE_SE2 : 2; - unsigned int PC_X_PHASE_SE3 : 2; - unsigned int : 24; - } gfx10; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SX_EXPORT_BUFFER_SIZES { - struct { - unsigned int COLOR_BUFFER_SIZE : 16; - unsigned int POSITION_BUFFER_SIZE : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SX_SCOREBOARD_BUFFER_SIZES { - struct { - unsigned int COLOR_SCOREBOARD_SIZE : 16; - unsigned int POSITION_SCOREBOARD_SIZE : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SYS_COMPUTE { - struct { - unsigned int PIPE : 8; - unsigned int : 24; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_SYS_WIF_CNTL { - struct { - unsigned int THRESHOLD : 8; - unsigned int : 24; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_TMPRING_SIZE { - struct { - unsigned int WAVES : 12; - unsigned int : 20; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int WAVESIZE : 13; - unsigned int : 7; - } gfx09_10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int WAVESIZE : 15; - unsigned int : 5; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_USER_ACCUM_VMID_CNTL { - struct { - unsigned int EN_USER_ACCUM : 4; - unsigned int : 28; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_VS_OUT_CONFIG { - struct { - unsigned int : 1; - unsigned int VS_EXPORT_COUNT : 5; - unsigned int : 26; - } bits, bitfields; - struct { - unsigned int : 6; - unsigned int VS_HALF_PACK : 1; - unsigned int : 25; - } gfx09_10; - struct { - unsigned int : 8; - unsigned int PRIM_EXPORT_COUNT : 5; - unsigned int : 19; - } gfx103PlusExclusive; - struct { - unsigned int : 7; - unsigned int NO_PC_EXPORT : 1; - unsigned int : 24; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WAVE_LIMIT_CNTL { - struct { - unsigned int PS_WAVE_GRAN : 2; - unsigned int : 2; - unsigned int GS_WAVE_GRAN : 2; - unsigned int HS_WAVE_GRAN : 2; - unsigned int : 24; - } bits, bitfields; - struct { - unsigned int : 2; - unsigned int VS_WAVE_GRAN : 2; - unsigned int : 28; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WAVE_LIMIT_CNTL_REMAP { - struct { - unsigned int RESERVED : 32; - } gfx101; -#if CHIP_HDR_NAVI21 - struct { - unsigned int RESERVED : 32; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int RESERVED : 32; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int RESERVED : 32; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int RESERVED : 32; - } nv24; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WCL_PIPE_PERCENT_CS0 { - struct { - unsigned int VALUE : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WCL_PIPE_PERCENT_CS1 { - struct { - unsigned int VALUE : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WCL_PIPE_PERCENT_CS2 { - struct { - unsigned int VALUE : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WCL_PIPE_PERCENT_CS3 { - struct { - unsigned int VALUE : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WCL_PIPE_PERCENT_CS4 { - struct { - unsigned int VALUE : 7; - unsigned int : 25; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WCL_PIPE_PERCENT_CS5 { - struct { - unsigned int VALUE : 7; - unsigned int : 25; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WCL_PIPE_PERCENT_CS6 { - struct { - unsigned int VALUE : 7; - unsigned int : 25; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WCL_PIPE_PERCENT_CS7 { - struct { - unsigned int VALUE : 7; - unsigned int : 25; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WCL_PIPE_PERCENT_GFX { - struct { - unsigned int VALUE : 7; - unsigned int : 5; - unsigned int HS_GRP_VALUE : 5; - unsigned int : 5; - unsigned int GS_GRP_VALUE : 5; - unsigned int : 5; - } bits, bitfields; - struct { - unsigned int : 7; - unsigned int LS_GRP_VALUE : 5; - unsigned int : 5; - unsigned int ES_GRP_VALUE : 5; - unsigned int : 10; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WCL_PIPE_PERCENT_HP3D { - struct { - unsigned int VALUE : 7; - unsigned int : 5; - unsigned int HS_GRP_VALUE : 5; - unsigned int : 5; - unsigned int GS_GRP_VALUE : 5; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_ACTIVE_COUNT_GFX { - struct { - unsigned int WF_ALLOCATED : 8; - unsigned int WF_ACTIVE : 16; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_ACTIVE_COUNT_HPG { - struct { - unsigned int WF_ALLOCATED : 8; - unsigned int WF_ACTIVE : 16; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_CNTL { - struct { - unsigned int SAMPLE_PERIOD : 4; - unsigned int EN : 1; - unsigned int : 27; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_LIMIT_0 { - struct { - unsigned int MAX_CNT : 31; - unsigned int EN_WARN : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_LIMIT_1 { - struct { - unsigned int MAX_CNT : 31; - unsigned int EN_WARN : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_LIMIT_2 { - struct { - unsigned int MAX_CNT : 31; - unsigned int EN_WARN : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_LIMIT_3 { - struct { - unsigned int MAX_CNT : 31; - unsigned int EN_WARN : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_LIMIT_4 { - struct { - unsigned int MAX_CNT : 31; - unsigned int EN_WARN : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_LIMIT_5 { - struct { - unsigned int MAX_CNT : 31; - unsigned int EN_WARN : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_LIMIT_6 { - struct { - unsigned int MAX_CNT : 31; - unsigned int EN_WARN : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_LIMIT_7 { - struct { - unsigned int MAX_CNT : 31; - unsigned int EN_WARN : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_LIMIT_8 { - struct { - unsigned int MAX_CNT : 31; - unsigned int EN_WARN : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_LIMIT_9 { - struct { - unsigned int MAX_CNT : 31; - unsigned int EN_WARN : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_0 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_1 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_2 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_3 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_4 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_5 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_6 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_7 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_8 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_9 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_10 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_11 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_12 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_13 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_14 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_15 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_16 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_17 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_18 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_19 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_20 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SPI_WF_LIFETIME_STATUS_21 { - struct { - unsigned int MAX_CNT : 31; - unsigned int INT_SENT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQG_CONFIG { - struct { - unsigned int UTCL0_PREFETCH_PAGE : 4; - unsigned int UTCL0_RETRY_TIMER : 7; - unsigned int : 21; - } gfx10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 13; - unsigned int SQG_ICPFT_EN : 1; - unsigned int SQG_ICPFT_CLR : 1; - unsigned int : 17; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int GL1H_PREFETCH_PAGE : 4; - unsigned int : 12; - unsigned int XNACK_INTR_MASK : 16; - } gfx11; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 12; - unsigned int SQG_WAVEDONE_FIFO_DEPTH : 1; - unsigned int : 19; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 12; - unsigned int SQG_WAVEDONE_FIFO_DEPTH : 1; - unsigned int : 19; - } nv24; -#endif - struct { - unsigned int : 12; - unsigned int SQG_WAVEDONE_FIFO_DEPTH : 1; - unsigned int : 19; - } raphael; - struct { - unsigned int : 12; - unsigned int SQG_WAVEDONE_FIFO_DEPTH : 1; - unsigned int : 19; - } rembrandt; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_GL1H_STATUS { - struct { - unsigned int R0_ACK_ERR_DETECTED : 1; - unsigned int R0_XNACK_ERR_DETECTED : 1; - unsigned int R1_ACK_ERR_DETECTED : 1; - unsigned int R1_XNACK_ERR_DETECTED : 1; - unsigned int : 28; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER4_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER4_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER4_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER5_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER5_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER5_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER6_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER6_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER6_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER7_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER7_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER7_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER_CTRL { - struct { - unsigned int PS_EN : 1; - unsigned int : 1; - unsigned int GS_EN : 1; - unsigned int : 1; - unsigned int HS_EN : 1; - unsigned int : 1; - unsigned int CS_EN : 1; - unsigned int : 7; - unsigned int DISABLE_ME0PIPE0_PERF : 1; - unsigned int DISABLE_ME0PIPE1_PERF : 1; - unsigned int DISABLE_ME1PIPE0_PERF : 1; - unsigned int DISABLE_ME1PIPE1_PERF : 1; - unsigned int DISABLE_ME1PIPE2_PERF : 1; - unsigned int DISABLE_ME1PIPE3_PERF : 1; - unsigned int : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERFCOUNTER_CTRL2 { - struct { - unsigned int FORCE_EN : 1; - unsigned int VMID_EN : 16; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQG_PERF_SAMPLE_FINISH { - struct { - unsigned int STATUS : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union SQG_STATUS { - struct { - unsigned int REG_BUSY : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQG_UTCL0_CNTL1 { - struct { - unsigned int FORCE_4K_L2_RESP : 1; - unsigned int GPUVM_64K_DEF : 1; - unsigned int GPUVM_PERM_MODE : 1; - unsigned int RESP_MODE : 2; - unsigned int RESP_FAULT_MODE : 2; - unsigned int CLIENTID : 9; - unsigned int RESERVED : 1; - unsigned int ENABLE_PUSH_LFIFO : 1; - unsigned int ENABLE_LFIFO_PRI_ARB : 1; - unsigned int REG_INV_VMID : 4; - unsigned int REG_INV_ALL_VMID : 1; - unsigned int REG_INV_TOGGLE : 1; - unsigned int CLIENT_INVALIDATE_ALL_VMID : 1; - unsigned int FORCE_MISS : 1; - unsigned int FORCE_IN_ORDER : 1; - unsigned int REDUCE_FIFO_DEPTH_BY_2 : 2; - unsigned int REDUCE_CACHE_SIZE_BY_2 : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQG_UTCL0_CNTL2 { - struct { - unsigned int SPARE : 8; - unsigned int LFIFO_SCAN_DISABLE : 1; - unsigned int MTYPE_OVRD_DIS : 1; - unsigned int LINE_VALID : 1; - unsigned int DIS_EDC : 1; - unsigned int GPUVM_INV_MODE : 1; - unsigned int SHOOTDOWN_OPT : 1; - unsigned int FORCE_SNOOP : 1; - unsigned int FORCE_GPUVM_INV_ACK : 1; - unsigned int ARB_BURST_MODE : 2; - unsigned int ENABLE_PERF_EVENT_RD_WR : 1; - unsigned int PERF_EVENT_RD_WR : 1; - unsigned int ENABLE_PERF_EVENT_VMID : 1; - unsigned int PERF_EVENT_VMID : 4; - unsigned int DIS_DUAL_L2_REQ : 1; - unsigned int FORCE_FRAG_2M_TO_64K : 1; - unsigned int PERM_MODE_OVRD : 1; - unsigned int LINE_INVALIDATE_OPT : 1; - unsigned int GPUVM_16K_DEF : 1; - unsigned int RESERVED : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQG_UTCL0_STATUS { - struct { - unsigned int FAULT_DETECTED : 1; - unsigned int RETRY_DETECTED : 1; - unsigned int PRT_DETECTED : 1; - unsigned int RESERVED : 5; - unsigned int UNUSED : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_ALU_CLK_CTRL { - struct { - unsigned int FORCE_CU_ON_SH0 : 16; - unsigned int FORCE_CU_ON_SH1 : 16; - } gfx09; - struct { - unsigned int FORCE_WGP_ON_SA0 : 16; - unsigned int FORCE_WGP_ON_SA1 : 16; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_ARB_CONFIG { - struct { - unsigned int WG_RR_INTERVAL : 2; - unsigned int : 2; - unsigned int FWD_PROG_INTERVAL : 2; - unsigned int : 26; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int DISABLE_SECOND_TRY : 1; - unsigned int : 23; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_BUF_RSRC_WORD0 { - struct { - unsigned int BASE_ADDRESS : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_BUF_RSRC_WORD1 { - struct { - unsigned int BASE_ADDRESS_HI : 16; - unsigned int STRIDE : 14; - unsigned int CACHE_SWIZZLE : 1; - unsigned int SWIZZLE_ENABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_BUF_RSRC_WORD2 { - struct { - unsigned int NUM_RECORDS : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_BUF_RSRC_WORD3 { - struct { - unsigned int DST_SEL_X : 3; - unsigned int DST_SEL_Y : 3; - unsigned int DST_SEL_Z : 3; - unsigned int DST_SEL_W : 3; - unsigned int NUM_FORMAT : 3; - unsigned int DATA_FORMAT : 4; - unsigned int USER_VM_ENABLE : 1; - unsigned int USER_VM_MODE : 1; - unsigned int INDEX_STRIDE : 2; - unsigned int ADD_TID_ENABLE : 1; - unsigned int : 3; - unsigned int NV : 1; - unsigned int : 2; - unsigned int TYPE : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_CLK_CTRL { - struct { - unsigned int : 2; - unsigned int SQ_SPI_MSG_FGCG_OVERRIDE : 1; - unsigned int SQ_SPI_EXPREQ_FGCG_OVERRIDE : 1; - unsigned int SQ_SX_EXPCMD_FGCG_OVERRIDE : 1; - unsigned int SQ_SQC_TTRACE_FGCG_OVERRIDE : 1; - unsigned int WCLK_OVERRIDE : 1; - unsigned int PERFMON_OVERRIDE : 1; - unsigned int OVERRIDE_LDS_IDX_BUSY : 1; - unsigned int OVERRIDE_LDS_DIRECT_BUSY : 1; - unsigned int WCLK_SLEEP_VMEM_OVERRIDE : 1; - unsigned int WCLK_SLEEP_EXPALLOC_OVERRIDE : 1; - unsigned int : 20; - } bits, bitfields; - struct { - unsigned int : 1; - unsigned int PIPE2DCLK_OVERRIDE : 1; - unsigned int : 30; - } gfx103; - struct { - unsigned int WCLK2DCLK_OVERRIDE : 1; - unsigned int : 31; - } gfx103Derivative; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 16; - unsigned int SQ_LDS_DIRECT_FGCG_OVERRIDE : 1; - unsigned int : 15; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int SQ_SP_CMD_FGCG_OVERRIDE : 1; - unsigned int SQ_SP_CONST_FGCG_OVERRIDE : 1; - unsigned int SQ_SP_EXP_FGCG_OVERRIDE : 1; - unsigned int SQ_SP_VMEM_FGCG_OVERRIDE : 1; - unsigned int : 16; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_CMD { - struct { - unsigned int : 4; - unsigned int MODE : 3; - unsigned int CHECK_VMID : 1; - unsigned int DATA : 4; - unsigned int : 12; - unsigned int QUEUE_ID : 3; - unsigned int : 1; - unsigned int VM_ID : 4; - } bits, bitfields; - struct { - unsigned int CMD : 3; - unsigned int : 13; - unsigned int WAVE_ID : 4; - unsigned int SIMD_ID : 2; - unsigned int : 10; - } gfx09; - struct { - unsigned int CMD : 4; - unsigned int : 12; - unsigned int WAVE_ID : 5; - unsigned int : 11; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_CMD_TIMESTAMP { - struct { - unsigned int TIMESTAMP : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_CONFIG { - struct { - unsigned int : 9; - unsigned int DEBUG_SINGLE_MEMOP : 1; - unsigned int DEBUG_ONE_INST_CLAUSE : 1; - unsigned int OVERRIDE_LDS_IDX_BUSY : 1; - unsigned int : 9; - unsigned int REPLAY_SLEEP_CNT : 7; - unsigned int : 4; - } most; - struct { - unsigned int : 7; - unsigned int OVERRIDE_ALU_BUSY : 1; - unsigned int DEBUG_EN : 1; - unsigned int : 3; - unsigned int EARLY_TA_DONE_DISABLE : 1; - unsigned int DUA_FLAT_LOCK_ENABLE : 1; - unsigned int DUA_LDS_BYPASS_DISABLE : 1; - unsigned int DUA_FLAT_LDS_PINGPONG_DISABLE : 1; - unsigned int DISABLE_VMEM_SOFT_CLAUSE : 1; - unsigned int DISABLE_SMEM_SOFT_CLAUSE : 1; - unsigned int : 10; - unsigned int DISABLE_SP_VGPR_WRITE_SKIP : 1; - unsigned int : 1; - unsigned int DISABLE_FLAT_SOFT_CLAUSE : 1; - unsigned int DISABLE_MIMG_SOFT_CLAUSE : 1; - } gfx09; - struct { - unsigned int : 18; - unsigned int ENABLE_HIPRIO_ON_EXP_RDY_VS : 1; - unsigned int PRIO_VAL_ON_EXP_RDY_VS : 2; - unsigned int : 8; - unsigned int DISABLE_SP_REDUNDANT_THREAD_GATING : 1; - unsigned int : 2; - } gfx09_10; - struct { - unsigned int : 12; - unsigned int VGPR_SWIZZLE_EN : 1; - unsigned int LDS_BUSY_HYSTERESIS_CNT : 2; - unsigned int SP_BUSY_HYSTERESIS_CNT : 2; - unsigned int : 13; - unsigned int TA_BUSY_HYSTERESIS_CNT : 2; - } gfx10; - struct { - unsigned int UNUSED : 7; - unsigned int : 25; - } gfx101; - struct { - unsigned int UNUSED : 5; - unsigned int CHICKEN_BIT_DEGGIGXX0_8637 : 1; - unsigned int UNUSED_6 : 1; - unsigned int : 25; - } gfx103Derivative; - struct { - unsigned int : 10; - unsigned int DISABLE_SGPR_RD_KILL : 1; - unsigned int : 10; - unsigned int WCLK_HYSTERESIS_CNT : 2; - unsigned int : 9; - } gfx103PlusExclusive; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 9; - unsigned int DISABLE_VMEM_EXEC_ZERO_SKIP : 1; - unsigned int : 17; - unsigned int DISABLE_END_CLAUSE_TX : 1; - unsigned int : 4; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int ECO_SPARE : 8; - unsigned int NEW_TRANS_ARB_SCHEME : 1; - unsigned int : 9; - unsigned int ENABLE_HIPRIO_ON_EXP_RDY_GS : 1; - unsigned int PRIO_VAL_ON_EXP_RDY_GS : 2; - unsigned int : 11; - } gfx11; -#endif - struct { - unsigned int UNUSED : 7; - unsigned int : 25; - } vg10_Vg12_Rv1x_Rv2x; - struct { - unsigned int DISABLE_BARRIER_WAITCNT : 1; - unsigned int UNUSED : 6; - unsigned int : 25; - } vg20_Rn; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_DSM_CNTL { - struct { - unsigned int WAVEFRONT_STALL_0 : 1; - unsigned int WAVEFRONT_STALL_1 : 1; - unsigned int SPI_BACKPRESSURE_0 : 1; - unsigned int SPI_BACKPRESSURE_1 : 1; - unsigned int : 4; - unsigned int SEL_DSM_SGPR_IRRITATOR_DATA0 : 1; - unsigned int SEL_DSM_SGPR_IRRITATOR_DATA1 : 1; - unsigned int SGPR_ENABLE_SINGLE_WRITE : 1; - unsigned int : 5; - unsigned int SEL_DSM_LDS_IRRITATOR_DATA0 : 1; - unsigned int SEL_DSM_LDS_IRRITATOR_DATA1 : 1; - unsigned int LDS_ENABLE_SINGLE_WRITE01 : 1; - unsigned int SEL_DSM_LDS_IRRITATOR_DATA2 : 1; - unsigned int SEL_DSM_LDS_IRRITATOR_DATA3 : 1; - unsigned int LDS_ENABLE_SINGLE_WRITE23 : 1; - unsigned int : 2; - unsigned int SEL_DSM_SP_IRRITATOR_DATA0 : 1; - unsigned int SEL_DSM_SP_IRRITATOR_DATA1 : 1; - unsigned int SP_ENABLE_SINGLE_WRITE : 1; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_DSM_CNTL2 { - struct { - unsigned int SGPR_ENABLE_ERROR_INJECT : 2; - unsigned int SGPR_SELECT_INJECT_DELAY : 1; - unsigned int LDS_D_ENABLE_ERROR_INJECT : 2; - unsigned int LDS_D_SELECT_INJECT_DELAY : 1; - unsigned int LDS_I_ENABLE_ERROR_INJECT : 2; - unsigned int LDS_I_SELECT_INJECT_DELAY : 1; - unsigned int SP_ENABLE_ERROR_INJECT : 2; - unsigned int SP_SELECT_INJECT_DELAY : 1; - unsigned int : 2; - unsigned int LDS_INJECT_DELAY : 6; - unsigned int SP_INJECT_DELAY : 6; - unsigned int SQ_INJECT_DELAY : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_DS_0 { - struct { - unsigned int OFFSET0 : 8; - unsigned int OFFSET1 : 8; - unsigned int GDS : 1; - unsigned int OP : 8; - unsigned int : 1; - unsigned int ENCODING : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_DS_1 { - struct { - unsigned int ADDR : 8; - unsigned int DATA0 : 8; - unsigned int DATA1 : 8; - unsigned int VDST : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_EDC_CNT { - struct { - unsigned int LDS_D_SEC_COUNT : 2; - unsigned int LDS_D_DED_COUNT : 2; - unsigned int LDS_I_SEC_COUNT : 2; - unsigned int LDS_I_DED_COUNT : 2; - unsigned int SGPR_SEC_COUNT : 2; - unsigned int SGPR_DED_COUNT : 2; - unsigned int VGPR0_SEC_COUNT : 2; - unsigned int VGPR0_DED_COUNT : 2; - unsigned int VGPR1_SEC_COUNT : 2; - unsigned int VGPR1_DED_COUNT : 2; - unsigned int VGPR2_SEC_COUNT : 2; - unsigned int VGPR2_DED_COUNT : 2; - unsigned int VGPR3_SEC_COUNT : 2; - unsigned int VGPR3_DED_COUNT : 2; - unsigned int : 4; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_EDC_DED_CNT { - struct { - unsigned int LDS_DED : 8; - unsigned int SGPR_DED : 8; - unsigned int VGPR_DED : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_EDC_FUE_CNTL { - struct { - unsigned int BLOCK_FUE_FLAGS : 16; - unsigned int FUE_INTERRUPT_ENABLES : 16; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_EDC_INFO { - struct { - unsigned int WAVE_ID : 4; - unsigned int SIMD_ID : 2; - unsigned int SOURCE : 3; - unsigned int VM_ID : 4; - unsigned int : 19; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_EDC_SEC_CNT { - struct { - unsigned int LDS_SEC : 8; - unsigned int SGPR_SEC : 8; - unsigned int VGPR_SEC : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_EXP_0 { - struct { - unsigned int EN : 4; - unsigned int TGT : 6; - unsigned int COMPR : 1; - unsigned int DONE : 1; - unsigned int VM : 1; - unsigned int : 13; - unsigned int ENCODING : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_EXP_1 { - struct { - unsigned int VSRC0 : 8; - unsigned int VSRC1 : 8; - unsigned int VSRC2 : 8; - unsigned int VSRC3 : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_FIFO_SIZES { - struct { - unsigned int INTERRUPT_FIFO_SIZE : 4; - unsigned int : 14; - unsigned int VMEM_DATA_FIFO_SIZE : 2; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int TTRACE_FIFO_SIZE : 4; - unsigned int : 4; - unsigned int EXPORT_BUF_SIZE : 2; - unsigned int : 14; - } gfx09; - struct { - unsigned int : 12; - unsigned int EXPORT_BUF_VS_RESERVED : 2; - unsigned int : 18; - } gfx10; - struct { - unsigned int : 8; - unsigned int TTRACE_FIFO_SIZE : 2; - unsigned int : 4; - unsigned int EXPORT_BUF_PS_RESERVED : 2; - unsigned int EXPORT_BUF_REDUCE : 2; - unsigned int : 14; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 12; - unsigned int EXPORT_BUF_GS_RESERVED : 2; - unsigned int : 6; - unsigned int EXPORT_BUF_PRIMPOS_LIMIT : 2; - unsigned int : 10; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_FLAT_0 { - struct { - unsigned int OFFSET : 12; - unsigned int : 1; - unsigned int LDS : 1; - unsigned int SEG : 2; - unsigned int GLC : 1; - unsigned int SLC : 1; - unsigned int OP : 7; - unsigned int : 1; - unsigned int ENCODING : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_FLAT_1 { - struct { - unsigned int ADDR : 8; - unsigned int DATA : 8; - unsigned int SADDR : 7; - unsigned int NV : 1; - unsigned int VDST : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_FLAT_SCRATCH_WORD0 { - struct { - unsigned int SIZE : 19; - unsigned int : 13; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_FLAT_SCRATCH_WORD1 { - struct { - unsigned int OFFSET : 24; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_GLBL_0 { - struct { - unsigned int OFFSET : 13; - unsigned int LDS : 1; - unsigned int SEG : 2; - unsigned int GLC : 1; - unsigned int SLC : 1; - unsigned int OP : 7; - unsigned int : 1; - unsigned int ENCODING : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_GLBL_1 { - struct { - unsigned int ADDR : 8; - unsigned int DATA : 8; - unsigned int SADDR : 7; - unsigned int NV : 1; - unsigned int VDST : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IMG_RSRC_WORD0 { - struct { - unsigned int BASE_ADDRESS : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IMG_RSRC_WORD1 { - struct { - unsigned int BASE_ADDRESS_HI : 8; - unsigned int MIN_LOD : 12; - unsigned int DATA_FORMAT : 6; - unsigned int NUM_FORMAT : 4; - unsigned int NV : 1; - unsigned int META_DIRECT : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IMG_RSRC_WORD2 { - struct { - unsigned int WIDTH : 14; - unsigned int HEIGHT : 14; - unsigned int PERF_MOD : 3; - unsigned int : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IMG_RSRC_WORD3 { - struct { - unsigned int DST_SEL_X : 3; - unsigned int DST_SEL_Y : 3; - unsigned int DST_SEL_Z : 3; - unsigned int DST_SEL_W : 3; - unsigned int BASE_LEVEL : 4; - unsigned int LAST_LEVEL : 4; - unsigned int SW_MODE : 5; - unsigned int : 3; - unsigned int TYPE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IMG_RSRC_WORD4 { - struct { - unsigned int DEPTH : 13; - unsigned int PITCH : 16; - unsigned int BC_SWIZZLE : 3; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IMG_RSRC_WORD5 { - struct { - unsigned int BASE_ARRAY : 13; - unsigned int ARRAY_PITCH : 4; - unsigned int META_DATA_ADDRESS : 8; - unsigned int META_LINEAR : 1; - unsigned int META_PIPE_ALIGNED : 1; - unsigned int META_RB_ALIGNED : 1; - unsigned int MAX_MIP : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IMG_RSRC_WORD6 { - struct { - unsigned int MIN_LOD_WARN : 12; - unsigned int COUNTER_BANK_ID : 8; - unsigned int LOD_HDW_CNT_EN : 1; - unsigned int COMPRESSION_EN : 1; - unsigned int ALPHA_IS_ON_MSB : 1; - unsigned int COLOR_TRANSFORM : 1; - unsigned int LOST_ALPHA_BITS : 4; - unsigned int LOST_COLOR_BITS : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IMG_RSRC_WORD7 { - struct { - unsigned int META_DATA_ADDRESS : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IMG_SAMP_WORD0 { - struct { - unsigned int CLAMP_X : 3; - unsigned int CLAMP_Y : 3; - unsigned int CLAMP_Z : 3; - unsigned int MAX_ANISO_RATIO : 3; - unsigned int DEPTH_COMPARE_FUNC : 3; - unsigned int FORCE_UNNORMALIZED : 1; - unsigned int ANISO_THRESHOLD : 3; - unsigned int MC_COORD_TRUNC : 1; - unsigned int FORCE_DEGAMMA : 1; - unsigned int ANISO_BIAS : 6; - unsigned int TRUNC_COORD : 1; - unsigned int DISABLE_CUBE_WRAP : 1; - unsigned int FILTER_MODE : 2; - unsigned int COMPAT_MODE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IMG_SAMP_WORD1 { - struct { - unsigned int MIN_LOD : 12; - unsigned int MAX_LOD : 12; - unsigned int PERF_MIP : 4; - unsigned int PERF_Z : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IMG_SAMP_WORD2 { - struct { - unsigned int LOD_BIAS : 14; - unsigned int LOD_BIAS_SEC : 6; - unsigned int XY_MAG_FILTER : 2; - unsigned int XY_MIN_FILTER : 2; - unsigned int Z_FILTER : 2; - unsigned int MIP_FILTER : 2; - unsigned int MIP_POINT_PRECLAMP : 1; - unsigned int BLEND_ZERO_PRT : 1; - unsigned int FILTER_PREC_FIX : 1; - unsigned int ANISO_OVERRIDE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IMG_SAMP_WORD3 { - struct { - unsigned int BORDER_COLOR_PTR : 12; - unsigned int SKIP_DEGAMMA : 1; - unsigned int : 17; - unsigned int BORDER_COLOR_TYPE : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IND_DATA { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_IND_INDEX { - struct { - unsigned int : 16; - unsigned int INDEX : 16; - } bits, bitfields; - struct { - unsigned int WAVE_ID : 4; - unsigned int SIMD_ID : 2; - unsigned int THREAD_ID : 6; - unsigned int AUTO_INCR : 1; - unsigned int FORCE_READ : 1; - unsigned int READ_TIMEOUT : 1; - unsigned int UNINDEXED : 1; - unsigned int : 16; - } gfx09; - struct { - unsigned int WAVE_ID : 5; - unsigned int WORKITEM_ID : 6; - unsigned int AUTO_INCR : 1; - unsigned int : 20; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_INST { - struct { - unsigned int ENCODING : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_INTERRUPT_AUTO_MASK { - struct { - unsigned int MASK : 24; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_INTERRUPT_MSG_CTRL { - struct { - unsigned int STALL : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_INTERRUPT_WORD_AUTO_CTXID { - struct { - unsigned int THREAD_TRACE : 1; - unsigned int WLT : 1; - unsigned int THREAD_TRACE_BUF_FULL : 1; - unsigned int REG_TIMESTAMP : 1; - unsigned int CMD_TIMESTAMP : 1; - unsigned int HOST_CMD_OVERFLOW : 1; - unsigned int HOST_REG_OVERFLOW : 1; - unsigned int IMMED_OVERFLOW : 1; - unsigned int THREAD_TRACE_UTC_ERROR : 1; - unsigned int : 15; - unsigned int SE_ID : 2; - unsigned int ENCODING : 2; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_INTERRUPT_WORD_AUTO_HI { - struct { - unsigned int : 8; - unsigned int SE_ID : 2; - unsigned int ENCODING : 2; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_INTERRUPT_WORD_AUTO_LO { - struct { - unsigned int THREAD_TRACE : 1; - unsigned int WLT : 1; - unsigned int THREAD_TRACE_BUF_FULL : 1; - unsigned int REG_TIMESTAMP : 1; - unsigned int CMD_TIMESTAMP : 1; - unsigned int HOST_CMD_OVERFLOW : 1; - unsigned int HOST_REG_OVERFLOW : 1; - unsigned int IMMED_OVERFLOW : 1; - unsigned int THREAD_TRACE_UTC_ERROR : 1; - unsigned int : 23; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_INTERRUPT_WORD_CMN_CTXID { - struct { - unsigned int : 24; - unsigned int SE_ID : 2; - unsigned int ENCODING : 2; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_INTERRUPT_WORD_CMN_HI { - struct { - unsigned int : 8; - unsigned int SE_ID : 2; - unsigned int ENCODING : 2; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_INTERRUPT_WORD_WAVE_CTXID { - struct { - unsigned int DATA : 12; - unsigned int SH_ID : 1; - unsigned int PRIV : 1; - unsigned int WAVE_ID : 4; - unsigned int SIMD_ID : 2; - unsigned int CU_ID : 4; - unsigned int SE_ID : 2; - unsigned int ENCODING : 2; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_INTERRUPT_WORD_WAVE_HI { - struct { - unsigned int CU_ID : 4; - unsigned int VM_ID : 4; - unsigned int SE_ID : 2; - unsigned int ENCODING : 2; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_INTERRUPT_WORD_WAVE_LO { - struct { - unsigned int DATA : 24; - unsigned int SH_ID : 1; - unsigned int PRIV : 1; - unsigned int WAVE_ID : 4; - unsigned int SIMD_ID : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_LB_CTR0_CU { - struct { - unsigned int SH0_MASK : 16; - unsigned int SH1_MASK : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_LB_CTR1_CU { - struct { - unsigned int SH0_MASK : 16; - unsigned int SH1_MASK : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_LB_CTR2_CU { - struct { - unsigned int SH0_MASK : 16; - unsigned int SH1_MASK : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_LB_CTR3_CU { - struct { - unsigned int SH0_MASK : 16; - unsigned int SH1_MASK : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_LB_CTR_CTRL { - struct { - unsigned int START : 1; - unsigned int LOAD : 1; - unsigned int CLEAR : 1; - unsigned int : 29; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_LB_CTR_SEL { - struct { - unsigned int SEL0 : 4; - unsigned int SEL1 : 4; - unsigned int SEL2 : 4; - unsigned int SEL3 : 4; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_LB_CTR_SEL0 { - struct { - unsigned int SEL0 : 8; - unsigned int : 7; - unsigned int DIV0 : 1; - unsigned int SEL1 : 8; - unsigned int : 7; - unsigned int DIV1 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_LB_CTR_SEL1 { - struct { - unsigned int SEL2 : 8; - unsigned int : 7; - unsigned int DIV2 : 1; - unsigned int SEL3 : 8; - unsigned int : 7; - unsigned int DIV3 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_LB_DATA0 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_LB_DATA1 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_LB_DATA2 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_LB_DATA3 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_LDS_CLK_CTRL { - struct { - unsigned int FORCE_CU_ON_SH0 : 16; - unsigned int FORCE_CU_ON_SH1 : 16; - } gfx09; - struct { - unsigned int FORCE_WGP_ON_SA0 : 16; - unsigned int FORCE_WGP_ON_SA1 : 16; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_M0_GPR_IDX_WORD { - struct { - unsigned int INDEX : 8; - unsigned int : 4; - unsigned int VSRC0_REL : 1; - unsigned int VSRC1_REL : 1; - unsigned int VSRC2_REL : 1; - unsigned int VDST_REL : 1; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_MIMG_0 { - struct { - unsigned int OPM : 1; - unsigned int : 7; - unsigned int DMASK : 4; - unsigned int UNORM : 1; - unsigned int GLC : 1; - unsigned int DA : 1; - unsigned int A16 : 1; - unsigned int TFE : 1; - unsigned int LWE : 1; - unsigned int OP : 7; - unsigned int SLC : 1; - unsigned int ENCODING : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_MIMG_1 { - struct { - unsigned int VADDR : 8; - unsigned int VDATA : 8; - unsigned int SRSRC : 5; - unsigned int SSAMP : 5; - unsigned int : 5; - unsigned int D16 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_MTBUF_0 { - struct { - unsigned int OFFSET : 12; - unsigned int OFFEN : 1; - unsigned int IDXEN : 1; - unsigned int GLC : 1; - unsigned int OP : 4; - unsigned int DFMT : 4; - unsigned int NFMT : 3; - unsigned int ENCODING : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_MTBUF_1 { - struct { - unsigned int VADDR : 8; - unsigned int VDATA : 8; - unsigned int SRSRC : 5; - unsigned int : 1; - unsigned int SLC : 1; - unsigned int TFE : 1; - unsigned int SOFFSET : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_MUBUF_0 { - struct { - unsigned int OFFSET : 12; - unsigned int OFFEN : 1; - unsigned int IDXEN : 1; - unsigned int GLC : 1; - unsigned int : 1; - unsigned int LDS : 1; - unsigned int SLC : 1; - unsigned int OP : 7; - unsigned int : 1; - unsigned int ENCODING : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_MUBUF_1 { - struct { - unsigned int VADDR : 8; - unsigned int VDATA : 8; - unsigned int SRSRC : 5; - unsigned int : 2; - unsigned int TFE : 1; - unsigned int SOFFSET : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER4_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER4_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER4_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER5_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER5_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER5_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER6_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER6_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER6_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER7_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER7_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER7_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER8_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER8_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER8_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER9_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER9_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER9_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER10_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER10_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER10_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER11_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER11_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER11_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER12_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER12_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER12_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER13_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER13_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER13_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER14_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER14_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER14_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER15_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER15_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER15_SELECT { - struct { - unsigned int PERF_SEL : 9; - unsigned int : 11; - unsigned int SPM_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int : 12; - unsigned int SQC_BANK_MASK : 4; - unsigned int : 16; - } most; - struct { - unsigned int : 16; - unsigned int SQC_CLIENT_MASK : 4; - unsigned int : 4; - unsigned int SIMD_MASK : 4; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER_CTRL { - struct { - unsigned int PS_EN : 1; - unsigned int : 1; - unsigned int GS_EN : 1; - unsigned int : 1; - unsigned int HS_EN : 1; - unsigned int : 1; - unsigned int CS_EN : 1; - unsigned int : 25; - } bits, bitfields; - struct { - unsigned int : 3; - unsigned int ES_EN : 1; - unsigned int : 1; - unsigned int LS_EN : 1; - unsigned int : 26; - } most; - struct { - unsigned int : 8; - unsigned int CNTR_RATE : 5; - unsigned int : 19; - } gfx09; - struct { - unsigned int : 1; - unsigned int VS_EN : 1; - unsigned int : 11; - unsigned int DISABLE_FLUSH : 1; - unsigned int : 18; - } gfx09_10; - struct { - unsigned int : 8; - unsigned int CNTR_RATE : 2; - unsigned int : 22; - } gfx10; - struct { - unsigned int : 14; - unsigned int DISABLE_ME0PIPE0_PERF : 1; - unsigned int DISABLE_ME0PIPE1_PERF : 1; - unsigned int DISABLE_ME1PIPE0_PERF : 1; - unsigned int DISABLE_ME1PIPE1_PERF : 1; - unsigned int DISABLE_ME1PIPE2_PERF : 1; - unsigned int DISABLE_ME1PIPE3_PERF : 1; - unsigned int : 12; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER_CTRL2 { - struct { - unsigned int FORCE_EN : 1; - unsigned int : 31; - } bits, bitfields; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 1; - unsigned int VMID_EN : 16; - unsigned int : 15; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_PERFCOUNTER_MASK { - struct { - unsigned int SH0_MASK : 16; - unsigned int SH1_MASK : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQ_PERF_SNAPSHOT_CTRL { - struct { - unsigned int TIMER_ON_OFF : 1; - unsigned int VMID_MASK : 16; - unsigned int COUNT_SEL : 1; - unsigned int COUNT_INTERVAL : 4; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQ_PERF_SNAPSHOT_DATA { - struct { - unsigned int VALID : 1; - unsigned int WAVE_ISSUE : 1; - unsigned int INST_TYPE : 4; - unsigned int NO_ISSUE_REASON : 3; - unsigned int ARB_STATE : 14; - unsigned int SAMPLING_ERR : 1; - unsigned int WAVE_ID : 5; - unsigned int : 3; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQ_PERF_SNAPSHOT_PC_HI { - struct { - unsigned int PC_HI : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union SQ_PERF_SNAPSHOT_PC_LO { - struct { - unsigned int PC_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union SQ_POWER_THROTTLE { - struct { - unsigned int MIN_POWER : 14; - unsigned int : 2; - unsigned int MAX_POWER : 14; - unsigned int PHASE_OFFSET : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_POWER_THROTTLE2 { - struct { - unsigned int MAX_POWER_DELTA : 14; - unsigned int : 2; - unsigned int SHORT_TERM_INTERVAL_SIZE : 10; - unsigned int : 1; - unsigned int LONG_TERM_INTERVAL_RATIO : 4; - unsigned int USE_REF_CLOCK : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_RANDOM_WAVE_PRI { - struct { - unsigned int RET : 7; - unsigned int RUI : 3; - unsigned int : 22; - } bits, bitfields; - struct { - unsigned int : 10; - unsigned int RNG : 13; - unsigned int : 9; - } gfx09; - struct { - unsigned int : 31; - unsigned int FORCE_IB_ARB_PRIO_MSK_VALID : 1; - } gfx103Plus; - struct { - unsigned int : 10; - unsigned int RNG : 14; - unsigned int : 8; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_REG_CREDITS { - struct { - unsigned int SRBM_CREDITS : 6; - unsigned int : 2; - unsigned int CMD_CREDITS : 4; - unsigned int : 16; - unsigned int REG_BUSY : 1; - unsigned int SRBM_OVERFLOW : 1; - unsigned int IMMED_OVERFLOW : 1; - unsigned int CMD_OVERFLOW : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_REG_TIMESTAMP { - struct { - unsigned int TIMESTAMP : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_RUNTIME_CONFIG { - struct { - unsigned int ENABLE_TEX_ARB_OLDEST : 1; - unsigned int : 31; - } gfx09; - struct { - unsigned int UNUSED_REGISTER : 1; - unsigned int : 31; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_SCRATCH_0 { - struct { - unsigned int OFFSET : 13; - unsigned int LDS : 1; - unsigned int SEG : 2; - unsigned int GLC : 1; - unsigned int SLC : 1; - unsigned int OP : 7; - unsigned int : 1; - unsigned int ENCODING : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_SCRATCH_1 { - struct { - unsigned int ADDR : 8; - unsigned int DATA : 8; - unsigned int SADDR : 7; - unsigned int NV : 1; - unsigned int VDST : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_SHADER_TBA_HI { - struct { - unsigned int ADDR_HI : 8; - unsigned int : 24; - } bits, bitfields; - struct { - unsigned int : 31; - unsigned int TRAP_EN : 1; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_SHADER_TBA_LO { - struct { - unsigned int ADDR_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_SHADER_TMA_HI { - struct { - unsigned int ADDR_HI : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_SHADER_TMA_LO { - struct { - unsigned int ADDR_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_SMEM_0 { - struct { - unsigned int SBASE : 6; - unsigned int SDATA : 7; - unsigned int : 1; - unsigned int SOFFSET_EN : 1; - unsigned int NV : 1; - unsigned int GLC : 1; - unsigned int IMM : 1; - unsigned int OP : 8; - unsigned int ENCODING : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_SMEM_1 { - struct { - unsigned int OFFSET : 21; - unsigned int : 4; - unsigned int SOFFSET : 7; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_SOP1 { - struct { - unsigned int SSRC0 : 8; - unsigned int OP : 8; - unsigned int SDST : 7; - unsigned int ENCODING : 9; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_SOP2 { - struct { - unsigned int SSRC0 : 8; - unsigned int SSRC1 : 8; - unsigned int SDST : 7; - unsigned int OP : 7; - unsigned int ENCODING : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_SOPC { - struct { - unsigned int SSRC0 : 8; - unsigned int SSRC1 : 8; - unsigned int OP : 7; - unsigned int ENCODING : 9; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_SOPK { - struct { - unsigned int SIMM16 : 16; - unsigned int SDST : 7; - unsigned int OP : 5; - unsigned int ENCODING : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_SOPP { - struct { - unsigned int SIMM16 : 16; - unsigned int OP : 7; - unsigned int ENCODING : 9; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_TEX_CLK_CTRL { - struct { - unsigned int FORCE_CU_ON_SH0 : 16; - unsigned int FORCE_CU_ON_SH1 : 16; - } gfx09; - struct { - unsigned int FORCE_WGP_ON_SA0 : 16; - unsigned int FORCE_WGP_ON_SA1 : 16; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_BASE { - struct { - unsigned int ADDR : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_BASE2 { - struct { - unsigned int ADDR_HI : 4; - unsigned int : 28; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_BUF0_BASE { - struct { - unsigned int BASE_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_BUF0_SIZE { - struct { - unsigned int BASE_HI : 4; - unsigned int : 4; - unsigned int SIZE : 22; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_BUF1_BASE { - struct { - unsigned int BASE_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_BUF1_SIZE { - struct { - unsigned int BASE_HI : 4; - unsigned int : 4; - unsigned int SIZE : 22; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_CNTR { - struct { - unsigned int CNTR : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_CTRL { - struct { - unsigned int : 31; - unsigned int RESET_BUFFER : 1; - } gfx09; - struct { - unsigned int : 3; - unsigned int CH_PERF_EN : 1; - unsigned int : 5; - unsigned int REG_STALL_EN : 1; - unsigned int SPI_STALL_EN : 1; - unsigned int SQ_STALL_EN : 1; - unsigned int REG_DROP_ON_STALL : 1; - unsigned int : 17; - unsigned int CAPTURE_ALL : 1; - unsigned int : 1; - } gfx10; - struct { - unsigned int : 28; - unsigned int AUTO_FLUSH_PADDING_DIS : 1; - unsigned int : 3; - } gfx103Plus; - struct { - unsigned int : 20; - unsigned int LOWATER_OFFSET : 3; - unsigned int : 6; - unsigned int AUTO_FLUSH_MODE : 1; - unsigned int : 2; - } gfx103PlusExclusive; - struct { - unsigned int MODE : 2; - unsigned int ALL_VMID : 1; - unsigned int : 1; - unsigned int INTERRUPT_EN : 1; - unsigned int DOUBLE_BUFFER : 1; - unsigned int HIWATER : 3; - unsigned int : 4; - unsigned int UTIL_TIMER : 1; - unsigned int WAVESTART_MODE : 2; - unsigned int RT_FREQ : 2; - unsigned int SYNC_COUNT_MARKERS : 1; - unsigned int SYNC_COUNT_DRAWS : 1; - unsigned int : 11; - unsigned int DRAW_EVENT_EN : 1; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 3; - unsigned int GL1_PERF_EN : 1; - unsigned int : 5; - unsigned int REG_AT_HWM : 2; - unsigned int SPI_STALL_EN : 1; - unsigned int SQ_STALL_EN : 1; - unsigned int : 19; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_DROPPED_CNTR { - struct { - unsigned int CNTR : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_GFX_DRAW_CNTR { - struct { - unsigned int CNTR : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_GFX_MARKER_CNTR { - struct { - unsigned int CNTR : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_HIWATER { - struct { - unsigned int HIWATER : 3; - unsigned int : 29; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_HP3D_DRAW_CNTR { - struct { - unsigned int CNTR : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_HP3D_MARKER_CNTR { - struct { - unsigned int CNTR : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_MASK { - struct { - unsigned int CU_SEL : 5; - unsigned int SH_SEL : 1; - unsigned int : 1; - unsigned int REG_STALL_EN : 1; - unsigned int SIMD_EN : 4; - unsigned int VM_ID_MASK : 2; - unsigned int SPI_STALL_EN : 1; - unsigned int SQ_STALL_EN : 1; - unsigned int : 16; - } gfx09; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 17; - unsigned int EXCLUDE_NONDETAIL_SHADERDATA : 1; - unsigned int : 14; - } gfx104Plus; -#endif - struct { - unsigned int SIMD_SEL : 2; - unsigned int : 2; - unsigned int WGP_SEL : 4; - unsigned int : 1; - unsigned int SA_SEL : 1; - unsigned int WTYPE_INCLUDE : 7; - unsigned int : 15; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_MODE { - struct { - unsigned int MASK_PS : 3; - unsigned int MASK_VS : 3; - unsigned int MASK_GS : 3; - unsigned int MASK_ES : 3; - unsigned int MASK_HS : 3; - unsigned int MASK_LS : 3; - unsigned int MASK_CS : 3; - unsigned int MODE : 2; - unsigned int CAPTURE_MODE : 2; - unsigned int AUTOFLUSH_EN : 1; - unsigned int TC_PERF_EN : 1; - unsigned int ISSUE_MASK : 2; - unsigned int TEST_MODE : 1; - unsigned int INTERRUPT_EN : 1; - unsigned int WRAP : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_PERF_MASK { - struct { - unsigned int SH0_MASK : 16; - unsigned int SH1_MASK : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_SIZE { - struct { - unsigned int SIZE : 22; - unsigned int : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_STATUS { - struct { - unsigned int FINISH_PENDING : 10; - unsigned int : 6; - unsigned int FINISH_DONE : 10; - unsigned int : 2; - unsigned int UTC_ERROR : 1; - unsigned int NEW_BUF : 1; - unsigned int BUSY : 1; - unsigned int FULL : 1; - } gfx09; - struct { - unsigned int : 24; - unsigned int UTC_ERR : 1; - unsigned int : 7; - } gfx10; - struct { - unsigned int : 28; - unsigned int OWNER_VMID : 4; - } gfx103PlusExclusive; - struct { - unsigned int : 26; - unsigned int EVENT_CNTR_OVERFLOW : 1; - unsigned int EVENT_CNTR_STALL : 1; - unsigned int : 4; - } gfx10Core; - struct { - unsigned int FINISH_PENDING : 12; - unsigned int FINISH_DONE : 12; - unsigned int : 1; - unsigned int BUSY : 1; - unsigned int : 6; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 24; - unsigned int WRITE_ERROR : 1; - unsigned int : 7; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_STATUS2 { - struct { - unsigned int BUF0_FULL : 1; - unsigned int BUF1_FULL : 1; - unsigned int : 2; - unsigned int PACKET_LOST_BUF_NO_LOCKDOWN : 1; - unsigned int : 27; - } bits, bitfields; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 8; - unsigned int BUF_ISSUE_STATUS : 5; - unsigned int BUF_ISSUE : 1; - unsigned int WRITE_BUF_FULL : 1; - unsigned int : 17; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_TOKEN_MASK { - struct { - unsigned int TOKEN_MASK : 16; - unsigned int REG_MASK : 8; - unsigned int REG_DROP_ON_STALL : 1; - unsigned int : 7; - } gfx09; - struct { - unsigned int TOKEN_EXCLUDE : 12; - unsigned int : 20; - } gfx101; - struct { - unsigned int TOKEN_EXCLUDE : 11; - unsigned int : 1; - unsigned int BOP_EVENTS_TOKEN_INCLUDE : 1; - unsigned int : 13; - unsigned int REG_EXCLUDE : 3; - unsigned int : 3; - } gfx103PlusExclusive; - struct { - unsigned int : 16; - unsigned int REG_INCLUDE : 8; - unsigned int INST_EXCLUDE : 2; - unsigned int : 5; - unsigned int REG_DETAIL_ALL : 1; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 11; - unsigned int TTRACE_EXEC : 1; - unsigned int : 20; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_TOKEN_MASK2 { - struct { - unsigned int INST_MASK : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_USERDATA_0 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_USERDATA_1 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_USERDATA_2 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_USERDATA_3 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_USERDATA_4 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_USERDATA_5 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_USERDATA_6 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_USERDATA_7 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_CMN { - struct { - unsigned int TOKEN_TYPE : 4; - unsigned int TIME_DELTA : 1; - unsigned int : 27; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_EVENT { - struct { - unsigned int TOKEN_TYPE : 4; - unsigned int TIME_DELTA : 1; - unsigned int SH_ID : 1; - unsigned int STAGE : 3; - unsigned int : 1; - unsigned int EVENT_TYPE : 6; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_INST { - struct { - unsigned int TOKEN_TYPE : 4; - unsigned int TIME_DELTA : 1; - unsigned int WAVE_ID : 4; - unsigned int SIMD_ID : 2; - unsigned int INST_TYPE : 5; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_INST_PC_1_OF_2 { - struct { - unsigned int TOKEN_TYPE : 4; - unsigned int TIME_DELTA : 1; - unsigned int WAVE_ID : 4; - unsigned int SIMD_ID : 2; - unsigned int : 4; - unsigned int TRAP_ERROR : 1; - unsigned int PC_LO : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_INST_PC_2_OF_2 { - struct { - unsigned int PC_HI : 24; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2 { - struct { - unsigned int TOKEN_TYPE : 4; - unsigned int TIME_DELTA : 1; - unsigned int SH_ID : 1; - unsigned int CU_ID : 4; - unsigned int WAVE_ID : 4; - unsigned int SIMD_ID : 2; - unsigned int DATA_LO : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_INST_USERDATA_2_OF_2 { - struct { - unsigned int DATA_HI : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_ISSUE { - struct { - unsigned int TOKEN_TYPE : 4; - unsigned int TIME_DELTA : 1; - unsigned int SIMD_ID : 2; - unsigned int : 1; - unsigned int INST0 : 2; - unsigned int INST1 : 2; - unsigned int INST2 : 2; - unsigned int INST3 : 2; - unsigned int INST4 : 2; - unsigned int INST5 : 2; - unsigned int INST6 : 2; - unsigned int INST7 : 2; - unsigned int INST8 : 2; - unsigned int INST9 : 2; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_MISC { - struct { - unsigned int TOKEN_TYPE : 4; - unsigned int TIME_DELTA : 8; - unsigned int SH_ID : 1; - unsigned int MISC_TOKEN_TYPE : 3; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_PERF_1_OF_2 { - struct { - unsigned int TOKEN_TYPE : 4; - unsigned int TIME_DELTA : 1; - unsigned int SH_ID : 1; - unsigned int CU_ID : 4; - unsigned int CNTR_BANK : 2; - unsigned int CNTR0 : 13; - unsigned int CNTR1_LO : 7; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_PERF_2_OF_2 { - struct { - unsigned int CNTR1_HI : 6; - unsigned int CNTR2 : 13; - unsigned int CNTR3 : 13; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_REG_1_OF_2 { - struct { - unsigned int TOKEN_TYPE : 4; - unsigned int TIME_DELTA : 1; - unsigned int PIPE_ID : 2; - unsigned int ME_ID : 2; - unsigned int REG_DROPPED_PREV : 1; - unsigned int REG_TYPE : 3; - unsigned int : 1; - unsigned int REG_PRIV : 1; - unsigned int REG_OP : 1; - unsigned int REG_ADDR : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_REG_2_OF_2 { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2 { - struct { - unsigned int TOKEN_TYPE : 4; - unsigned int TIME_DELTA : 1; - unsigned int PIPE_ID : 2; - unsigned int ME_ID : 2; - unsigned int REG_ADDR : 7; - unsigned int DATA_LO : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_REG_CS_2_OF_2 { - struct { - unsigned int DATA_HI : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_TIMESTAMP_1_OF_2 { - struct { - unsigned int TOKEN_TYPE : 4; - unsigned int : 12; - unsigned int TIME_LO : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_TIMESTAMP_2_OF_2 { - struct { - unsigned int TIME_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_WAVE { - struct { - unsigned int TOKEN_TYPE : 4; - unsigned int TIME_DELTA : 1; - unsigned int SH_ID : 1; - unsigned int CU_ID : 4; - unsigned int WAVE_ID : 4; - unsigned int SIMD_ID : 2; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WORD_WAVE_START { - struct { - unsigned int TOKEN_TYPE : 4; - unsigned int TIME_DELTA : 1; - unsigned int SH_ID : 1; - unsigned int CU_ID : 4; - unsigned int WAVE_ID : 4; - unsigned int SIMD_ID : 2; - unsigned int DISPATCHER : 5; - unsigned int VS_NO_ALLOC_OR_GROUPED : 1; - unsigned int COUNT : 7; - unsigned int TG_ID : 3; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_THREAD_TRACE_WPTR { - struct { - unsigned int WPTR : 30; - unsigned int READ_OFFSET : 2; - } gfx09; - struct { - unsigned int OFFSET : 29; - unsigned int : 2; - unsigned int BUFFER_ID : 1; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_TIME_HI { - struct { - unsigned int TIME : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_TIME_LO { - struct { - unsigned int TIME : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_UTCL1_CNTL1 { - struct { - unsigned int FORCE_4K_L2_RESP : 1; - unsigned int GPUVM_64K_DEF : 1; - unsigned int GPUVM_PERM_MODE : 1; - unsigned int RESP_MODE : 2; - unsigned int RESP_FAULT_MODE : 2; - unsigned int CLIENTID : 9; - unsigned int USERVM_DIS : 1; - unsigned int ENABLE_PUSH_LFIFO : 1; - unsigned int ENABLE_LFIFO_PRI_ARB : 1; - unsigned int REG_INVALIDATE_VMID : 4; - unsigned int REG_INVALIDATE_ALL_VMID : 1; - unsigned int REG_INVALIDATE_TOGGLE : 1; - unsigned int REG_INVALIDATE_ALL : 1; - unsigned int FORCE_MISS : 1; - unsigned int FORCE_IN_ORDER : 1; - unsigned int REDUCE_FIFO_DEPTH_BY_2 : 2; - unsigned int REDUCE_CACHE_SIZE_BY_2 : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_UTCL1_CNTL2 { - struct { - unsigned int SPARE : 8; - unsigned int LFIFO_SCAN_DISABLE : 1; - unsigned int MTYPE_OVRD_DIS : 1; - unsigned int LINE_VALID : 1; - unsigned int DIS_EDC : 1; - unsigned int GPUVM_INV_MODE : 1; - unsigned int SHOOTDOWN_OPT : 1; - unsigned int FORCE_SNOOP : 1; - unsigned int FORCE_GPUVM_INV_ACK : 1; - unsigned int RETRY_TIMER : 7; - unsigned int : 3; - unsigned int FORCE_FRAG_2M_TO_64K : 1; - unsigned int : 1; - unsigned int PREFETCH_PAGE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_UTCL1_STATUS { - struct { - unsigned int FAULT_DETECTED : 1; - unsigned int RETRY_DETECTED : 1; - unsigned int PRT_DETECTED : 1; - unsigned int RESERVED : 13; - unsigned int UNUSED : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_VINTRP { - struct { - unsigned int VSRC : 8; - unsigned int ATTRCHAN : 2; - unsigned int ATTR : 6; - unsigned int OP : 2; - unsigned int VDST : 8; - unsigned int ENCODING : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_VOP1 { - struct { - unsigned int SRC0 : 9; - unsigned int OP : 8; - unsigned int VDST : 8; - unsigned int ENCODING : 7; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_VOP2 { - struct { - unsigned int SRC0 : 9; - unsigned int VSRC1 : 8; - unsigned int VDST : 8; - unsigned int OP : 6; - unsigned int ENCODING : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_VOP3P_0 { - struct { - unsigned int VDST : 8; - unsigned int NEG_HI : 3; - unsigned int OP_SEL : 3; - unsigned int OP_SEL_HI_2 : 1; - unsigned int CLAMP : 1; - unsigned int OP : 7; - unsigned int ENCODING : 9; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_VOP3P_1 { - struct { - unsigned int SRC0 : 9; - unsigned int SRC1 : 9; - unsigned int SRC2 : 9; - unsigned int OP_SEL_HI : 2; - unsigned int NEG : 3; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_VOP3_0 { - struct { - unsigned int VDST : 8; - unsigned int ABS : 3; - unsigned int OP_SEL : 4; - unsigned int CLAMP : 1; - unsigned int OP : 10; - unsigned int ENCODING : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_VOP3_0_SDST_ENC { - struct { - unsigned int VDST : 8; - unsigned int SDST : 7; - unsigned int CLAMP : 1; - unsigned int OP : 10; - unsigned int ENCODING : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_VOP3_1 { - struct { - unsigned int SRC0 : 9; - unsigned int SRC1 : 9; - unsigned int SRC2 : 9; - unsigned int OMOD : 2; - unsigned int NEG : 3; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_VOPC { - struct { - unsigned int SRC0 : 9; - unsigned int VSRC1 : 8; - unsigned int OP : 8; - unsigned int ENCODING : 7; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_VOP_DPP { - struct { - unsigned int SRC0 : 8; - unsigned int DPP_CTRL : 9; - unsigned int : 2; - unsigned int BOUND_CTRL : 1; - unsigned int SRC0_NEG : 1; - unsigned int SRC0_ABS : 1; - unsigned int SRC1_NEG : 1; - unsigned int SRC1_ABS : 1; - unsigned int BANK_MASK : 4; - unsigned int ROW_MASK : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_VOP_SDWA { - struct { - unsigned int SRC0 : 8; - unsigned int DST_SEL : 3; - unsigned int DST_UNUSED : 2; - unsigned int CLAMP : 1; - unsigned int OMOD : 2; - unsigned int SRC0_SEL : 3; - unsigned int SRC0_SEXT : 1; - unsigned int SRC0_NEG : 1; - unsigned int SRC0_ABS : 1; - unsigned int : 1; - unsigned int S0 : 1; - unsigned int SRC1_SEL : 3; - unsigned int SRC1_SEXT : 1; - unsigned int SRC1_NEG : 1; - unsigned int SRC1_ABS : 1; - unsigned int : 1; - unsigned int S1 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_VOP_SDWA_SDST_ENC { - struct { - unsigned int SRC0 : 8; - unsigned int SDST : 7; - unsigned int SD : 1; - unsigned int SRC0_SEL : 3; - unsigned int SRC0_SEXT : 1; - unsigned int SRC0_NEG : 1; - unsigned int SRC0_ABS : 1; - unsigned int : 1; - unsigned int S0 : 1; - unsigned int SRC1_SEL : 3; - unsigned int SRC1_SEXT : 1; - unsigned int SRC1_NEG : 1; - unsigned int SRC1_ABS : 1; - unsigned int : 1; - unsigned int S1 : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WATCH0_ADDR_H { - struct { - unsigned int ADDR : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WATCH0_ADDR_L { - struct { - unsigned int : 6; - unsigned int ADDR : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WATCH0_CNTL { - struct { - unsigned int MASK : 24; - unsigned int VMID : 4; - unsigned int : 3; - unsigned int VALID : 1; - } bits, bitfields; - struct { - unsigned int : 29; - unsigned int MODE : 2; - unsigned int : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WATCH1_ADDR_H { - struct { - unsigned int ADDR : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WATCH1_ADDR_L { - struct { - unsigned int : 6; - unsigned int ADDR : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WATCH1_CNTL { - struct { - unsigned int MASK : 24; - unsigned int VMID : 4; - unsigned int : 3; - unsigned int VALID : 1; - } bits, bitfields; - struct { - unsigned int : 29; - unsigned int MODE : 2; - unsigned int : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WATCH2_ADDR_H { - struct { - unsigned int ADDR : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WATCH2_ADDR_L { - struct { - unsigned int : 6; - unsigned int ADDR : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WATCH2_CNTL { - struct { - unsigned int MASK : 24; - unsigned int VMID : 4; - unsigned int : 3; - unsigned int VALID : 1; - } bits, bitfields; - struct { - unsigned int : 29; - unsigned int MODE : 2; - unsigned int : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WATCH3_ADDR_H { - struct { - unsigned int ADDR : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WATCH3_ADDR_L { - struct { - unsigned int : 6; - unsigned int ADDR : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WATCH3_CNTL { - struct { - unsigned int MASK : 24; - unsigned int VMID : 4; - unsigned int : 3; - unsigned int VALID : 1; - } bits, bitfields; - struct { - unsigned int : 29; - unsigned int MODE : 2; - unsigned int : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WREXEC_EXEC_HI { - struct { - unsigned int ADDR_HI : 16; - unsigned int : 10; - unsigned int FIRST_WAVE : 1; - unsigned int : 1; - unsigned int MTYPE : 3; - unsigned int MSB : 1; - } most; - struct { - unsigned int : 27; - unsigned int ATC : 1; - unsigned int : 4; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SQ_WREXEC_EXEC_LO { - struct { - unsigned int ADDR_LO : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_BLEND_OPT_CONTROL { - struct { - unsigned int MRT0_COLOR_OPT_DISABLE : 1; - unsigned int MRT0_ALPHA_OPT_DISABLE : 1; - unsigned int : 2; - unsigned int MRT1_COLOR_OPT_DISABLE : 1; - unsigned int MRT1_ALPHA_OPT_DISABLE : 1; - unsigned int : 2; - unsigned int MRT2_COLOR_OPT_DISABLE : 1; - unsigned int MRT2_ALPHA_OPT_DISABLE : 1; - unsigned int : 2; - unsigned int MRT3_COLOR_OPT_DISABLE : 1; - unsigned int MRT3_ALPHA_OPT_DISABLE : 1; - unsigned int : 2; - unsigned int MRT4_COLOR_OPT_DISABLE : 1; - unsigned int MRT4_ALPHA_OPT_DISABLE : 1; - unsigned int : 2; - unsigned int MRT5_COLOR_OPT_DISABLE : 1; - unsigned int MRT5_ALPHA_OPT_DISABLE : 1; - unsigned int : 2; - unsigned int MRT6_COLOR_OPT_DISABLE : 1; - unsigned int MRT6_ALPHA_OPT_DISABLE : 1; - unsigned int : 2; - unsigned int MRT7_COLOR_OPT_DISABLE : 1; - unsigned int MRT7_ALPHA_OPT_DISABLE : 1; - unsigned int : 1; - unsigned int PIXEN_ZERO_OPT_DISABLE : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_BLEND_OPT_EPSILON { - struct { - unsigned int MRT0_EPSILON : 4; - unsigned int MRT1_EPSILON : 4; - unsigned int MRT2_EPSILON : 4; - unsigned int MRT3_EPSILON : 4; - unsigned int MRT4_EPSILON : 4; - unsigned int MRT5_EPSILON : 4; - unsigned int MRT6_EPSILON : 4; - unsigned int MRT7_EPSILON : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_MRT0_BLEND_OPT { - struct { - unsigned int COLOR_SRC_OPT : 3; - unsigned int : 1; - unsigned int COLOR_DST_OPT : 3; - unsigned int : 1; - unsigned int COLOR_COMB_FCN : 3; - unsigned int : 5; - unsigned int ALPHA_SRC_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_DST_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_MRT1_BLEND_OPT { - struct { - unsigned int COLOR_SRC_OPT : 3; - unsigned int : 1; - unsigned int COLOR_DST_OPT : 3; - unsigned int : 1; - unsigned int COLOR_COMB_FCN : 3; - unsigned int : 5; - unsigned int ALPHA_SRC_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_DST_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_MRT2_BLEND_OPT { - struct { - unsigned int COLOR_SRC_OPT : 3; - unsigned int : 1; - unsigned int COLOR_DST_OPT : 3; - unsigned int : 1; - unsigned int COLOR_COMB_FCN : 3; - unsigned int : 5; - unsigned int ALPHA_SRC_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_DST_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_MRT3_BLEND_OPT { - struct { - unsigned int COLOR_SRC_OPT : 3; - unsigned int : 1; - unsigned int COLOR_DST_OPT : 3; - unsigned int : 1; - unsigned int COLOR_COMB_FCN : 3; - unsigned int : 5; - unsigned int ALPHA_SRC_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_DST_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_MRT4_BLEND_OPT { - struct { - unsigned int COLOR_SRC_OPT : 3; - unsigned int : 1; - unsigned int COLOR_DST_OPT : 3; - unsigned int : 1; - unsigned int COLOR_COMB_FCN : 3; - unsigned int : 5; - unsigned int ALPHA_SRC_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_DST_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_MRT5_BLEND_OPT { - struct { - unsigned int COLOR_SRC_OPT : 3; - unsigned int : 1; - unsigned int COLOR_DST_OPT : 3; - unsigned int : 1; - unsigned int COLOR_COMB_FCN : 3; - unsigned int : 5; - unsigned int ALPHA_SRC_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_DST_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_MRT6_BLEND_OPT { - struct { - unsigned int COLOR_SRC_OPT : 3; - unsigned int : 1; - unsigned int COLOR_DST_OPT : 3; - unsigned int : 1; - unsigned int COLOR_COMB_FCN : 3; - unsigned int : 5; - unsigned int ALPHA_SRC_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_DST_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_MRT7_BLEND_OPT { - struct { - unsigned int COLOR_SRC_OPT : 3; - unsigned int : 1; - unsigned int COLOR_DST_OPT : 3; - unsigned int : 1; - unsigned int COLOR_COMB_FCN : 3; - unsigned int : 5; - unsigned int ALPHA_SRC_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_DST_OPT : 3; - unsigned int : 1; - unsigned int ALPHA_COMB_FCN : 3; - unsigned int : 5; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER0_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } most; - struct { - unsigned int PERFCOUNTER_SELECT : 10; - unsigned int PERFCOUNTER_SELECT1 : 10; - unsigned int : 12; - } gfx09_0; - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx09_1x; - struct { - unsigned int PERFCOUNTER_SELECT : 10; - unsigned int PERFCOUNTER_SELECT1 : 10; - unsigned int : 12; - } gfx101; - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER0_SELECT1 { - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } most; - struct { - unsigned int PERFCOUNTER_SELECT2 : 10; - unsigned int PERFCOUNTER_SELECT3 : 10; - unsigned int : 12; - } gfx09_0; - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } gfx09_1x; - struct { - unsigned int PERFCOUNTER_SELECT2 : 10; - unsigned int PERFCOUNTER_SELECT3 : 10; - unsigned int : 12; - } gfx101; - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER1_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 24; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } most; - struct { - unsigned int PERFCOUNTER_SELECT : 10; - unsigned int PERFCOUNTER_SELECT1 : 10; - unsigned int : 12; - } gfx09_0; - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx09_1x; - struct { - unsigned int PERFCOUNTER_SELECT : 10; - unsigned int PERFCOUNTER_SELECT1 : 10; - unsigned int : 12; - } gfx101; - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER1_SELECT1 { - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } most; - struct { - unsigned int PERFCOUNTER_SELECT2 : 10; - unsigned int PERFCOUNTER_SELECT3 : 10; - unsigned int : 12; - } gfx09_0; - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } gfx09_1x; - struct { - unsigned int PERFCOUNTER_SELECT2 : 10; - unsigned int PERFCOUNTER_SELECT3 : 10; - unsigned int : 12; - } gfx101; - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER2_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 28; - unsigned int PERF_MODE : 4; - } most; - struct { - unsigned int PERFCOUNTER_SELECT : 10; - unsigned int PERFCOUNTER_SELECT1 : 10; - unsigned int : 12; - } gfx09_0; - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx09_1x; - struct { - unsigned int PERFCOUNTER_SELECT : 10; - unsigned int PERFCOUNTER_SELECT1 : 10; - unsigned int : 12; - } gfx101; - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PERFCOUNTER3_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int : 8; - } bits, bitfields; - struct { - unsigned int : 28; - unsigned int PERF_MODE : 4; - } most; - struct { - unsigned int PERFCOUNTER_SELECT : 10; - unsigned int PERFCOUNTER_SELECT1 : 10; - unsigned int : 12; - } gfx09_0; - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx09_1x; - struct { - unsigned int PERFCOUNTER_SELECT : 10; - unsigned int PERFCOUNTER_SELECT1 : 10; - unsigned int : 12; - } gfx101; - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PS_DOWNCONVERT { - struct { - unsigned int MRT0 : 4; - unsigned int MRT1 : 4; - unsigned int MRT2 : 4; - unsigned int MRT3 : 4; - unsigned int MRT4 : 4; - unsigned int MRT5 : 4; - unsigned int MRT6 : 4; - unsigned int MRT7 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union SX_PS_DOWNCONVERT_CONTROL { - struct { - unsigned int MRT0_FMT_MAPPING_DISABLE : 1; - unsigned int MRT1_FMT_MAPPING_DISABLE : 1; - unsigned int MRT2_FMT_MAPPING_DISABLE : 1; - unsigned int MRT3_FMT_MAPPING_DISABLE : 1; - unsigned int MRT4_FMT_MAPPING_DISABLE : 1; - unsigned int MRT5_FMT_MAPPING_DISABLE : 1; - unsigned int MRT6_FMT_MAPPING_DISABLE : 1; - unsigned int MRT7_FMT_MAPPING_DISABLE : 1; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_BC_BASE_ADDR { - struct { - unsigned int ADDRESS : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_BC_BASE_ADDR_HI { - struct { - unsigned int ADDRESS : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_CGTT_CTRL { - struct { - unsigned int ON_DELAY : 4; - unsigned int OFF_HYSTERESIS : 8; - unsigned int : 4; - unsigned int SOFT_STALL_OVERRIDE7 : 1; - unsigned int SOFT_STALL_OVERRIDE6 : 1; - unsigned int SOFT_STALL_OVERRIDE5 : 1; - unsigned int SOFT_STALL_OVERRIDE4 : 1; - unsigned int SOFT_STALL_OVERRIDE3 : 1; - unsigned int SOFT_STALL_OVERRIDE2 : 1; - unsigned int SOFT_STALL_OVERRIDE1 : 1; - unsigned int SOFT_STALL_OVERRIDE0 : 1; - unsigned int SOFT_OVERRIDE7 : 1; - unsigned int SOFT_OVERRIDE6 : 1; - unsigned int SOFT_OVERRIDE5 : 1; - unsigned int SOFT_OVERRIDE4 : 1; - unsigned int SOFT_OVERRIDE3 : 1; - unsigned int SOFT_OVERRIDE2 : 1; - unsigned int SOFT_OVERRIDE1 : 1; - unsigned int SOFT_OVERRIDE0 : 1; - } core; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_CNTL { - struct { - unsigned int : 16; - unsigned int ALIGNER_CREDIT : 5; - unsigned int : 11; - } bits, bitfields; - struct { - unsigned int FX_XNACK_CREDIT : 7; - unsigned int : 25; - } most; - struct { - unsigned int : 22; - unsigned int TD_FIFO_CREDIT : 10; - } core; - struct { - unsigned int : 9; - unsigned int SQ_XNACK_CREDIT : 4; - unsigned int TC_DATA_CREDIT : 3; - unsigned int : 16; - } gfx09; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int TA_SQ_XNACK_FGCG_DISABLE : 1; - unsigned int : 31; - } gfx104Plus; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int TA_SQ_XNACK_FGCG_DISABLE : 1; - unsigned int : 31; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int TA_SQ_XNACK_FGCG_DISABLE : 1; - unsigned int : 31; - } nv24; -#endif - struct { - unsigned int TA_SQ_XNACK_FGCG_DISABLE : 1; - unsigned int : 31; - } raphael; - struct { - unsigned int TA_SQ_XNACK_FGCG_DISABLE : 1; - unsigned int : 31; - } rembrandt; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union TA_CNTL2 { - struct { - unsigned int : 16; - unsigned int POINT_SAMPLE_ACCEL_DIS : 1; - unsigned int : 2; - unsigned int ELIMINATE_UNLIT_QUAD_DIS : 1; - unsigned int : 12; - } bits, bitfields; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 17; - unsigned int ELEMSIZE_HASH_DIS : 1; - unsigned int TRUNCATE_COORD_MODE : 1; - unsigned int : 13; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union TA_CNTL_AUX { - struct { - unsigned int SCOAL_DSWIZZLE_N : 1; - unsigned int : 4; - unsigned int TFAULT_EN_OVERRIDE : 1; - unsigned int : 1; - unsigned int DISABLE_GATHER4_BC_SWIZZLE : 1; - unsigned int : 2; - unsigned int ANISO_HALF_THRESH : 2; - unsigned int ANISO_ERROR_FP_VBIAS : 1; - unsigned int ANISO_STEP_ORDER : 1; - unsigned int ANISO_STEP : 1; - unsigned int MINMAG_UNNORM : 1; - unsigned int ANISO_WEIGHT_MODE : 1; - unsigned int ANISO_RATIO_LUT : 1; - unsigned int ANISO_TAP : 1; - unsigned int : 1; - unsigned int DETERMINISM_RESERVED_DISABLE : 1; - unsigned int DETERMINISM_OPCODE_STRICT_DISABLE : 1; - unsigned int DETERMINISM_MISC_DISABLE : 1; - unsigned int DETERMINISM_SAMPLE_C_DFMT_DISABLE : 1; - unsigned int DETERMINISM_SAMPLER_MSAA_DISABLE : 1; - unsigned int DETERMINISM_WRITEOP_READFMT_DISABLE : 1; - unsigned int DETERMINISM_DFMT_NFMT_DISABLE : 1; - unsigned int : 1; - unsigned int CUBEMAP_SLICE_CLAMP : 1; - unsigned int TRUNC_SMALL_NEG : 1; - unsigned int ARRAY_ROUND_MODE : 2; - } bits, bitfields; - struct { - unsigned int : 27; - unsigned int DISABLE_DWORD_X2_COALESCE : 1; - unsigned int : 4; - } most; - struct { - unsigned int : 6; - unsigned int GATHERH_DST_SEL : 1; - unsigned int : 25; - } core; - struct { - unsigned int : 1; - unsigned int RESERVED : 3; - unsigned int : 5; - unsigned int NONIMG_ANISO_BYPASS : 1; - unsigned int : 9; - unsigned int ANISO_MIP_ADJ_MODE : 1; - unsigned int : 12; - } gfx09; - struct { - unsigned int : 1; - unsigned int RESERVED : 3; - unsigned int : 28; - } gfx101; - struct { - unsigned int : 2; - unsigned int CORNER_SAMPLES_MIN_DIM : 1; - unsigned int OVERRIDE_QUAD_MODE_DIS : 1; - unsigned int : 28; - } gfx103Plus; - struct { - unsigned int : 1; - unsigned int DEPTH_AS_PITCH_DIS : 1; - unsigned int : 30; - } gfx103PlusExclusive; - struct { - unsigned int : 4; - unsigned int DERIV_ADJUST_DIS : 1; - unsigned int : 27; - } gfx10CorePlus; - struct { - unsigned int : 8; - unsigned int ANISO_MAG_STEP_CLAMP : 1; - unsigned int AUTO_ALIGN_FORMAT : 1; - unsigned int : 22; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_CS_BC_BASE_ADDR { - struct { - unsigned int ADDRESS : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_CS_BC_BASE_ADDR_HI { - struct { - unsigned int ADDRESS : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_DSM_CNTL { - struct { - unsigned int TA_FS_DFIFO_DSM_IRRITATOR_DATA : 2; - unsigned int TA_FS_DFIFO_ENABLE_SINGLE_WRITE : 1; - unsigned int TA_FS_AFIFO_DSM_IRRITATOR_DATA : 2; - unsigned int TA_FS_AFIFO_ENABLE_SINGLE_WRITE : 1; - unsigned int TA_FL_LFIFO_DSM_IRRITATOR_DATA : 2; - unsigned int TA_FL_LFIFO_ENABLE_SINGLE_WRITE : 1; - unsigned int TA_FX_LFIFO_DSM_IRRITATOR_DATA : 2; - unsigned int TA_FX_LFIFO_ENABLE_SINGLE_WRITE : 1; - unsigned int TA_FS_CFIFO_DSM_IRRITATOR_DATA : 2; - unsigned int TA_FS_CFIFO_ENABLE_SINGLE_WRITE : 1; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_DSM_CNTL2 { - struct { - unsigned int TA_FS_DFIFO_ENABLE_ERROR_INJECT : 2; - unsigned int TA_FS_DFIFO_SELECT_INJECT_DELAY : 1; - unsigned int TA_FS_AFIFO_ENABLE_ERROR_INJECT : 2; - unsigned int TA_FS_AFIFO_SELECT_INJECT_DELAY : 1; - unsigned int TA_FL_LFIFO_ENABLE_ERROR_INJECT : 2; - unsigned int TA_FL_LFIFO_SELECT_INJECT_DELAY : 1; - unsigned int TA_FX_LFIFO_ENABLE_ERROR_INJECT : 2; - unsigned int TA_FX_LFIFO_SELECT_INJECT_DELAY : 1; - unsigned int TA_FS_CFIFO_ENABLE_ERROR_INJECT : 2; - unsigned int TA_FS_CFIFO_SELECT_INJECT_DELAY : 1; - unsigned int : 11; - unsigned int TA_INJECT_DELAY : 6; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_EDC_CNT { - struct { - unsigned int TA_FS_DFIFO_SEC_COUNT : 2; - unsigned int TA_FS_DFIFO_DED_COUNT : 2; - unsigned int TA_FS_AFIFO_SED_COUNT : 2; - unsigned int TA_FL_LFIFO_SED_COUNT : 2; - unsigned int TA_FX_LFIFO_SED_COUNT : 2; - unsigned int TA_FS_CFIFO_SED_COUNT : 2; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_PERFCOUNTER0_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 8; - unsigned int : 2; - unsigned int PERF_SEL1 : 8; - unsigned int : 14; - } most; - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx103Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_PERFCOUNTER0_SELECT1 { - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL2 : 8; - unsigned int : 2; - unsigned int PERF_SEL3 : 8; - unsigned int : 14; - } most; - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } gfx103Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_PERFCOUNTER1_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 8; - unsigned int : 24; - } most; - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 8; - unsigned int : 6; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx09_0; - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx103Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_POWER_CNTL { - struct { - unsigned int SAMPLER_CLK_VALID_DELAY : 3; - unsigned int SAMPLER_CLK_EN_MODE : 1; - unsigned int : 12; - unsigned int NOSAMPLER_CLK_VALID_DELAY : 3; - unsigned int NOSAMPLER_CLK_EN_MODE : 1; - unsigned int : 12; - } bits, bitfields; - struct { - unsigned int : 4; - unsigned int LOD_CLK_VALID_DELAY : 3; - unsigned int LOD_CLK_EN_MODE : 1; - unsigned int ADDR_CLK_VALID_DELAY : 3; - unsigned int ADDR_CLK_EN_MODE : 1; - unsigned int GRAD_CLK_VALID_DELAY : 3; - unsigned int GRAD_CLK_EN_MODE : 1; - unsigned int : 4; - unsigned int BUFFERFLAT_CLK_VALID_DELAY : 3; - unsigned int BUFFERFLAT_CLK_EN_MODE : 1; - unsigned int WRITEDATA_CLK_VALID_DELAY : 3; - unsigned int WRITEDATA_CLK_EN_MODE : 1; - unsigned int LOWER_CLK_VALID_DELAY : 3; - unsigned int LOWER_CLK_EN_MODE : 1; - } gfx103; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_RESERVED_010C { - struct { - unsigned int Unused : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_SCRATCH { - struct { - unsigned int SCRATCH : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TA_STATUS { - struct { - unsigned int : 12; - unsigned int FG_PFIFO_EMPTYB : 1; - unsigned int FG_LFIFO_EMPTYB : 1; - unsigned int FG_SFIFO_EMPTYB : 1; - unsigned int : 1; - unsigned int FL_PFIFO_EMPTYB : 1; - unsigned int FL_LFIFO_EMPTYB : 1; - unsigned int FL_SFIFO_EMPTYB : 1; - unsigned int : 1; - unsigned int FA_PFIFO_EMPTYB : 1; - unsigned int FA_LFIFO_EMPTYB : 1; - unsigned int FA_SFIFO_EMPTYB : 1; - unsigned int : 1; - unsigned int IN_BUSY : 1; - unsigned int FG_BUSY : 1; - unsigned int LA_BUSY : 1; - unsigned int FL_BUSY : 1; - unsigned int TA_BUSY : 1; - unsigned int FA_BUSY : 1; - unsigned int AL_BUSY : 1; - unsigned int BUSY : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCA_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE2 : 4; - unsigned int PERF_MODE3 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCC_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 10; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER_FILTER { - struct { - unsigned int BUFFER : 1; - unsigned int FLAT : 1; - unsigned int DIM : 3; - unsigned int : 27; - } bits, bitfields; - struct { - unsigned int : 5; - unsigned int DATA_FORMAT : 6; - unsigned int NUM_FORMAT : 4; - unsigned int SW_MODE : 5; - unsigned int NUM_SAMPLES : 2; - unsigned int OPCODE_TYPE : 3; - unsigned int GLC : 1; - unsigned int SLC : 1; - unsigned int COMPRESSION_ENABLE : 1; - unsigned int ADDR_MODE : 3; - unsigned int : 1; - } gfx09; - struct { - unsigned int : 5; - unsigned int DATA_FORMAT : 7; - unsigned int : 1; - unsigned int NUM_FORMAT : 4; - unsigned int SW_MODE : 5; - unsigned int NUM_SAMPLES : 2; - unsigned int OPCODE_TYPE : 3; - unsigned int SLC : 1; - unsigned int DLC : 1; - unsigned int GLC : 1; - unsigned int COMPRESSION_ENABLE : 1; - unsigned int : 1; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER_FILTER2 { - struct { - unsigned int REQ_MODE : 3; - unsigned int : 29; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TCP_PERFCOUNTER_FILTER_EN { - struct { - unsigned int BUFFER : 1; - unsigned int FLAT : 1; - unsigned int DIM : 1; - unsigned int DATA_FORMAT : 1; - unsigned int NUM_FORMAT : 1; - unsigned int SW_MODE : 1; - unsigned int NUM_SAMPLES : 1; - unsigned int OPCODE_TYPE : 1; - unsigned int : 24; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int GLC : 1; - unsigned int SLC : 1; - unsigned int COMPRESSION_ENABLE : 1; - unsigned int ADDR_MODE : 1; - unsigned int : 20; - } gfx09; - struct { - unsigned int : 8; - unsigned int SLC : 1; - unsigned int DLC : 1; - unsigned int GLC : 1; - unsigned int COMPRESSION_ENABLE : 1; - unsigned int REQ_MODE : 1; - unsigned int : 19; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TD_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TD_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TD_PERFCOUNTER0_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 8; - unsigned int : 2; - unsigned int PERF_SEL1 : 8; - unsigned int : 14; - } most; - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int : 12; - } gfx103Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TD_PERFCOUNTER0_SELECT1 { - struct { - unsigned int : 24; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL2 : 8; - unsigned int : 2; - unsigned int PERF_SEL3 : 8; - unsigned int : 14; - } most; - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 12; - } gfx103Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TD_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TD_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union TD_PERFCOUNTER1_SELECT { - struct { - unsigned int : 20; - unsigned int CNTR_MODE : 4; - unsigned int : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - struct { - unsigned int PERF_SEL : 8; - unsigned int : 24; - } most; - struct { - unsigned int : 10; - unsigned int PERF_SEL1 : 8; - unsigned int : 6; - unsigned int PERF_MODE1 : 4; - unsigned int : 4; - } gfx09_0; - struct { - unsigned int PERF_SEL : 10; - unsigned int : 22; - } gfx103Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH0_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH1_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH2_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH3_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH4_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH5_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH6_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH7_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH8_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH9_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH10_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH11_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH12_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH13_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH14_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH15_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH16_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH17_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH18_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH19_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH20_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH21_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH22_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH23_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH24_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH25_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH26_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH27_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH28_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH29_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH30_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtl1 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtl2 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtl3 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtl4 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtl5 { - struct { - unsigned int EventSelect : 8; - unsigned int RdWrMask : 2; - unsigned int PriorityMask : 4; - unsigned int ReqSizeMask : 2; - unsigned int ChipSelMask : 4; - unsigned int ChipIDSel : 4; - unsigned int VCSel : 5; - unsigned int SubChanMask : 2; - unsigned int Enable : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtlClk { - struct { - unsigned int GlblResetMsk : 6; - unsigned int : 18; - unsigned int GlblReset : 1; - unsigned int GlblMonEn : 1; - unsigned int : 5; - unsigned int CtrClkEn : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtr1_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtr1_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtr2_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtr2_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtr3_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtr3_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtr4_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtr4_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtr5_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtr5_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtrClk_Hi { - struct { - unsigned int Data : 16; - unsigned int Overflow : 1; - unsigned int : 15; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UMCCH31_PerfMonCtrClk_Lo { - struct { - unsigned int Data : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UTCL1_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UTCL1_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UTCL1_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 18; - unsigned int COUNTER_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UTCL1_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UTCL1_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union UTCL1_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 18; - unsigned int COUNTER_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union UTCL1_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union UTCL1_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union UTCL1_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 18; - unsigned int COUNTER_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union UTCL1_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union UTCL1_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -union UTCL1_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int : 18; - unsigned int COUNTER_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; -#endif - -union VGT_CACHE_INVALIDATION { - struct { - unsigned int CACHE_INVALIDATION : 2; - unsigned int : 2; - unsigned int DIS_INSTANCING_OPT : 1; - unsigned int VS_NO_EXTRA_BUFFER : 1; - unsigned int AUTO_INVLD_EN : 2; - unsigned int : 1; - unsigned int USE_GS_DONE : 1; - unsigned int : 1; - unsigned int DIS_RANGE_FULL_INVLD : 1; - unsigned int GS_LATE_ALLOC_EN : 1; - unsigned int STREAMOUT_FULL_FLUSH : 1; - unsigned int : 2; - unsigned int ES_LIMIT : 5; - unsigned int ENABLE_PING_PONG : 1; - unsigned int OPT_FLOW_CNTL_1 : 3; - unsigned int OPT_FLOW_CNTL_2 : 3; - unsigned int EN_WAVE_MERGE : 1; - unsigned int ENABLE_PING_PONG_EOI : 1; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_CNTL_STATUS { - struct { - unsigned int VGT_BUSY : 1; - unsigned int VGT_OUT_INDX_BUSY : 1; - unsigned int VGT_OUT_BUSY : 1; - unsigned int VGT_PT_BUSY : 1; - unsigned int VGT_TE_BUSY : 1; - unsigned int VGT_VR_BUSY : 1; - unsigned int VGT_PI_BUSY : 1; - unsigned int VGT_GS_BUSY : 1; - unsigned int VGT_HS_BUSY : 1; - unsigned int VGT_TE11_BUSY : 1; - unsigned int VGT_PRIMGEN_BUSY : 1; - unsigned int : 21; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DISPATCH_DRAW_INDEX { - struct { - unsigned int MATCH_INDEX : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DMA_BASE { - struct { - unsigned int BASE_ADDR : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DMA_BASE_HI { - struct { - unsigned int BASE_ADDR : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DMA_CONTROL { - struct { - unsigned int PRIMGROUP_SIZE : 16; - unsigned int : 1; - unsigned int IA_SWITCH_ON_EOP : 1; - unsigned int : 1; - unsigned int SWITCH_ON_EOI : 1; - unsigned int WD_SWITCH_ON_EOP : 1; - unsigned int : 11; - } bits, bitfields; - struct { - unsigned int : 21; - unsigned int EN_INST_OPT_BASIC : 1; - unsigned int EN_INST_OPT_ADV : 1; - unsigned int HW_USE_ONLY : 1; - unsigned int : 8; - } gfx09; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DMA_DATA_FIFO_DEPTH { - struct { - unsigned int DMA_DATA_FIFO_DEPTH : 9; - unsigned int DMA2DRAW_FIFO_DEPTH : 10; - unsigned int : 13; - } gfx09; - struct { - unsigned int DMA_DATA_FIFO_DEPTH : 10; - unsigned int : 22; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DMA_EVENT_INITIATOR { - struct { - unsigned int EVENT_TYPE : 6; - unsigned int : 4; - unsigned int ADDRESS_HI : 17; - unsigned int EXTENDED_EVENT : 1; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DMA_INDEX_TYPE { - struct { - unsigned int INDEX_TYPE : 2; - unsigned int SWAP_MODE : 2; - unsigned int BUF_TYPE : 2; - unsigned int : 3; - unsigned int NOT_EOP : 1; - unsigned int REQ_PATH : 1; - unsigned int : 21; - } bits, bitfields; - struct { - unsigned int : 6; - unsigned int RDREQ_POLICY : 1; - unsigned int : 1; - unsigned int PRIMGEN_EN : 1; - unsigned int : 23; - } gfx09; - struct { - unsigned int : 14; - unsigned int DISABLE_INSTANCE_PACKING : 1; - unsigned int : 17; - } gfx103Plus; - struct { - unsigned int : 6; - unsigned int RDREQ_POLICY : 2; - unsigned int ATC : 1; - unsigned int : 2; - unsigned int MTYPE : 3; - unsigned int : 18; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DMA_LS_HS_CONFIG { - struct { - unsigned int : 8; - unsigned int HS_NUM_INPUT_CP : 6; - unsigned int : 18; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DMA_MAX_SIZE { - struct { - unsigned int MAX_SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DMA_NUM_INSTANCES { - struct { - unsigned int NUM_INSTANCES : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DMA_PRIMITIVE_TYPE { - struct { - unsigned int PRIM_TYPE : 6; - unsigned int : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DMA_REQ_FIFO_DEPTH { - struct { - unsigned int DMA_REQ_FIFO_DEPTH : 6; - unsigned int : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DMA_SIZE { - struct { - unsigned int NUM_INDICES : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DRAW_INITIATOR { - struct { - unsigned int SOURCE_SELECT : 2; - unsigned int MAJOR_MODE : 2; - unsigned int SPRITE_EN_R6XX : 1; - unsigned int NOT_EOP : 1; - unsigned int USE_OPAQUE : 1; - unsigned int : 22; - unsigned int REG_RT_INDEX : 3; - } bits, bitfields; - struct { - unsigned int : 7; - unsigned int UNROLLED_INST : 1; - unsigned int GRBM_SKEW_NO_DEC : 1; - unsigned int : 23; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DRAW_INIT_FIFO_DEPTH { - struct { - unsigned int DRAW_INIT_FIFO_DEPTH : 6; - unsigned int : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_DRAW_PAYLOAD_CNTL { - struct { - unsigned int : 1; - unsigned int EN_REG_RT_INDEX : 1; - unsigned int : 30; - } bits, bitfields; - struct { - unsigned int : 1; - unsigned int : 31; - } most; - struct { - unsigned int : 2; - unsigned int : 2; - unsigned int : 28; - } gfx09; - struct { - unsigned int : 2; - unsigned int : 1; - unsigned int : 29; - } gfx101; - struct { - unsigned int : 6; - unsigned int EN_VRS_RATE : 1; - unsigned int : 25; - } gfx103Plus; - struct { - unsigned int : 3; - unsigned int EN_PRIM_PAYLOAD : 1; - unsigned int EN_DRAW_VP : 1; - unsigned int : 27; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_ENHANCE { - struct { - unsigned int MISC : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_ESGS_RING_ITEMSIZE { - struct { - unsigned int ITEMSIZE : 15; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_ESGS_RING_SIZE { - struct { - unsigned int MEM_SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_ESGS_RING_SIZE_UMD { - struct { - unsigned int MEM_SIZE : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_ES_PER_GS { - struct { - unsigned int ES_PER_GS : 11; - unsigned int : 21; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_EVENT_ADDRESS_REG { - struct { - unsigned int ADDRESS_LOW : 28; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_EVENT_INITIATOR { - struct { - unsigned int EVENT_TYPE : 6; - unsigned int : 4; - unsigned int ADDRESS_HI : 17; - unsigned int EXTENDED_EVENT : 1; - unsigned int : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_FIFO_DEPTHS { - struct { - unsigned int VS_DEALLOC_TBL_DEPTH : 7; - unsigned int RESERVED_0 : 1; - unsigned int CLIPP_FIFO_DEPTH : 14; - unsigned int : 10; - } bits, bitfields; - struct { - unsigned int : 22; - unsigned int HSINPUT_FIFO_DEPTH : 6; - unsigned int : 4; - } gfx09; - struct { - unsigned int : 22; - unsigned int RESERVED_1 : 1; - unsigned int HSINPUT_FIFO_DEPTH : 6; - unsigned int : 3; - } gfx10; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GROUP_DECR { - struct { - unsigned int DECR : 4; - unsigned int : 28; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GROUP_FIRST_DECR { - struct { - unsigned int FIRST_DECR : 4; - unsigned int : 28; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GROUP_PRIM_TYPE { - struct { - unsigned int PRIM_TYPE : 5; - unsigned int : 9; - unsigned int RETAIN_ORDER : 1; - unsigned int RETAIN_QUADS : 1; - unsigned int PRIM_ORDER : 3; - unsigned int : 13; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GROUP_VECT_0_CNTL { - struct { - unsigned int COMP_X_EN : 1; - unsigned int COMP_Y_EN : 1; - unsigned int COMP_Z_EN : 1; - unsigned int COMP_W_EN : 1; - unsigned int : 4; - unsigned int STRIDE : 8; - unsigned int SHIFT : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GROUP_VECT_0_FMT_CNTL { - struct { - unsigned int X_CONV : 4; - unsigned int X_OFFSET : 4; - unsigned int Y_CONV : 4; - unsigned int Y_OFFSET : 4; - unsigned int Z_CONV : 4; - unsigned int Z_OFFSET : 4; - unsigned int W_CONV : 4; - unsigned int W_OFFSET : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GROUP_VECT_1_CNTL { - struct { - unsigned int COMP_X_EN : 1; - unsigned int COMP_Y_EN : 1; - unsigned int COMP_Z_EN : 1; - unsigned int COMP_W_EN : 1; - unsigned int : 4; - unsigned int STRIDE : 8; - unsigned int SHIFT : 8; - unsigned int : 8; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GROUP_VECT_1_FMT_CNTL { - struct { - unsigned int X_CONV : 4; - unsigned int X_OFFSET : 4; - unsigned int Y_CONV : 4; - unsigned int Y_OFFSET : 4; - unsigned int Z_CONV : 4; - unsigned int Z_OFFSET : 4; - unsigned int W_CONV : 4; - unsigned int W_OFFSET : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GSVS_RING_ITEMSIZE { - struct { - unsigned int ITEMSIZE : 15; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GSVS_RING_OFFSET_1 { - struct { - unsigned int OFFSET : 15; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GSVS_RING_OFFSET_2 { - struct { - unsigned int OFFSET : 15; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GSVS_RING_OFFSET_3 { - struct { - unsigned int OFFSET : 15; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GSVS_RING_SIZE { - struct { - unsigned int MEM_SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GSVS_RING_SIZE_UMD { - struct { - unsigned int MEM_SIZE : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_INSTANCE_CNT { - struct { - unsigned int ENABLE : 1; - unsigned int : 1; - unsigned int CNT : 7; - unsigned int : 23; - } bits, bitfields; - struct { - unsigned int : 31; - unsigned int EN_MAX_VERT_OUT_PER_GS_INSTANCE : 1; - } gfx10Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_MAX_PRIMS_PER_SUBGROUP { - struct { - unsigned int MAX_PRIMS_PER_SUBGROUP : 16; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_MAX_VERT_OUT { - struct { - unsigned int MAX_VERT_OUT : 11; - unsigned int : 21; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_MAX_WAVE_ID { - struct { - unsigned int MAX_WAVE_ID : 12; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_MODE { - struct { - unsigned int MODE : 3; - unsigned int RESERVED_0 : 1; - unsigned int CUT_MODE : 2; - unsigned int RESERVED_1 : 5; - unsigned int GS_C_PACK_EN : 1; - unsigned int RESERVED_2 : 1; - unsigned int ES_PASSTHRU : 1; - unsigned int : 3; - unsigned int PARTIAL_THD_AT_EOI : 1; - unsigned int SUPPRESS_CUTS : 1; - unsigned int ES_WRITE_OPTIMIZE : 1; - unsigned int GS_WRITE_OPTIMIZE : 1; - unsigned int ONCHIP : 2; - unsigned int : 9; - } bits, bitfields; - struct { - unsigned int : 14; - unsigned int RESERVED_3 : 1; - unsigned int RESERVED_4 : 1; - unsigned int RESERVED_5 : 1; - unsigned int : 15; - } gfx09; - struct { - unsigned int : 14; - unsigned int COMPUTE_MODE : 1; - unsigned int FAST_COMPUTE_MODE : 1; - unsigned int ELEMENT_INFO_EN : 1; - unsigned int : 15; - } gfx10; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_ONCHIP_CNTL { - struct { - unsigned int ES_VERTS_PER_SUBGRP : 11; - unsigned int GS_PRIMS_PER_SUBGRP : 11; - unsigned int GS_INST_PRIMS_IN_SUBGRP : 10; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_OUT_PRIM_TYPE { - struct { - unsigned int OUTPRIM_TYPE : 6; - unsigned int : 26; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int OUTPRIM_TYPE_1 : 6; - unsigned int : 2; - unsigned int OUTPRIM_TYPE_2 : 6; - unsigned int OUTPRIM_TYPE_3 : 6; - unsigned int : 3; - unsigned int UNIQUE_TYPE_PER_STREAM : 1; - } gfx09_10; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_PER_ES { - struct { - unsigned int GS_PER_ES : 11; - unsigned int : 21; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_PER_VS { - struct { - unsigned int GS_PER_VS : 4; - unsigned int : 28; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_VERTEX_REUSE { - struct { - unsigned int VERT_REUSE : 5; - unsigned int : 27; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_VERT_ITEMSIZE { - struct { - unsigned int ITEMSIZE : 15; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_VERT_ITEMSIZE_1 { - struct { - unsigned int ITEMSIZE : 15; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_VERT_ITEMSIZE_2 { - struct { - unsigned int ITEMSIZE : 15; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_GS_VERT_ITEMSIZE_3 { - struct { - unsigned int ITEMSIZE : 15; - unsigned int : 17; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_HOS_CNTL { - struct { - unsigned int TESS_MODE : 2; - unsigned int : 30; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_HOS_MAX_TESS_LEVEL { - struct { - unsigned int MAX_TESS : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_HOS_MIN_TESS_LEVEL { - struct { - unsigned int MIN_TESS : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_HOS_REUSE_DEPTH { - struct { - unsigned int REUSE_DEPTH : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_HS_OFFCHIP_PARAM { - struct { - unsigned int OFFCHIP_BUFFERING : 9; - unsigned int OFFCHIP_GRANULARITY : 2; - unsigned int : 21; - } most; - struct { - unsigned int OFFCHIP_BUFFERING : 10; - unsigned int OFFCHIP_GRANULARITY : 2; - unsigned int : 20; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_HS_OFFCHIP_PARAM_UMD { - struct { - unsigned int OFFCHIP_BUFFERING : 9; - unsigned int OFFCHIP_GRANULARITY : 2; - unsigned int : 21; - } most; -#if CHIP_HDR_NAVI21 - struct { - unsigned int OFFCHIP_BUFFERING : 10; - unsigned int OFFCHIP_GRANULARITY : 2; - unsigned int : 20; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int OFFCHIP_BUFFERING : 10; - unsigned int OFFCHIP_GRANULARITY : 2; - unsigned int : 20; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int OFFCHIP_BUFFERING : 10; - unsigned int OFFCHIP_GRANULARITY : 2; - unsigned int : 20; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int OFFCHIP_BUFFERING : 10; - unsigned int OFFCHIP_GRANULARITY : 2; - unsigned int : 20; - } nv24; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_IMMED_DATA { - struct { - unsigned int DATA : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_INDEX_TYPE { - struct { - unsigned int INDEX_TYPE : 2; - unsigned int : 30; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int PRIMGEN_EN : 1; - unsigned int : 23; - } gfx09; - struct { - unsigned int : 14; - unsigned int DISABLE_INSTANCE_PACKING : 1; - unsigned int : 17; - } gfx103Plus; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_INDX_OFFSET { - struct { - unsigned int INDX_OFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_INSTANCE_BASE_ID { - struct { - unsigned int INSTANCE_BASE_ID : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_INSTANCE_STEP_RATE_0 { - struct { - unsigned int STEP_RATE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_INSTANCE_STEP_RATE_1 { - struct { - unsigned int STEP_RATE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_LAST_COPY_STATE { - struct { - unsigned int SRC_STATE_ID : 3; - unsigned int : 13; - unsigned int DST_STATE_ID : 3; - unsigned int : 13; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_LS_HS_CONFIG { - struct { - unsigned int NUM_PATCHES : 8; - unsigned int HS_NUM_INPUT_CP : 6; - unsigned int HS_NUM_OUTPUT_CP : 6; - unsigned int : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_MAX_VTX_INDX { - struct { - unsigned int MAX_INDX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_MC_LAT_CNTL { - struct { - unsigned int MC_TIME_STAMP_RES : 4; - unsigned int : 28; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_MIN_VTX_INDX { - struct { - unsigned int MIN_INDX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_MULTI_PRIM_IB_RESET_EN { - struct { - unsigned int RESET_EN : 1; - unsigned int MATCH_ALL_BITS : 1; - unsigned int : 30; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_MULTI_PRIM_IB_RESET_INDX { - struct { - unsigned int RESET_INDX : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_NUM_INDICES { - struct { - unsigned int NUM_INDICES : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_NUM_INSTANCES { - struct { - unsigned int NUM_INSTANCES : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_OUTPUT_PATH_CNTL { - struct { - unsigned int PATH_SELECT : 3; - unsigned int : 29; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_OUT_DEALLOC_CNTL { - struct { - unsigned int DEALLOC_DIST : 7; - unsigned int : 25; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER0_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 10; - unsigned int PERF_SEL1 : 10; - unsigned int CNTR_MODE : 4; - unsigned int PERF_MODE1 : 4; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER1_SELECT1 { - struct { - unsigned int PERF_SEL2 : 10; - unsigned int PERF_SEL3 : 10; - unsigned int : 4; - unsigned int PERF_MODE3 : 4; - unsigned int PERF_MODE2 : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 8; - unsigned int : 20; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 8; - unsigned int : 20; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PERFCOUNTER_SEID_MASK { - struct { - unsigned int PERF_SEID_IGNORE_MASK : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PRIMITIVEID_EN { - struct { - unsigned int PRIMITIVEID_EN : 1; - unsigned int DISABLE_RESET_ON_EOI : 1; - unsigned int NGG_DISABLE_PROVOK_REUSE : 1; - unsigned int : 29; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PRIMITIVEID_RESET { - struct { - unsigned int VALUE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_PRIMITIVE_TYPE { - struct { - unsigned int PRIM_TYPE : 6; - unsigned int : 26; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_REUSE_OFF { - struct { - unsigned int REUSE_OFF : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_SHADER_STAGES_EN { - struct { - unsigned int LS_EN : 2; - unsigned int HS_EN : 1; - unsigned int ES_EN : 2; - unsigned int GS_EN : 1; - unsigned int VS_EN : 2; - unsigned int : 4; - unsigned int VS_WAVE_ID_EN : 1; - unsigned int PRIMGEN_EN : 1; - unsigned int ORDERED_ID_MODE : 1; - unsigned int MAX_PRIMGRP_IN_WAVE : 4; - unsigned int : 13; - } bits, bitfields; - struct { - unsigned int : 9; - unsigned int DISPATCH_DRAW_EN : 1; - unsigned int DIS_DEALLOC_ACCUM_0 : 1; - unsigned int DIS_DEALLOC_ACCUM_1 : 1; - unsigned int : 20; - } most; - struct { - unsigned int : 19; - unsigned int GS_FAST_LAUNCH : 1; - unsigned int : 12; - } gfx09_0; - struct { - unsigned int : 19; - unsigned int GS_FAST_LAUNCH : 2; - unsigned int : 11; - } gfx09_1xPlus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 26; - unsigned int PRIMGEN_PASSTHRU_NO_MSG : 1; - unsigned int : 5; - } gfx104Plus; -#endif - struct { - unsigned int : 8; - unsigned int DYNAMIC_HS : 1; - unsigned int : 12; - unsigned int HS_W32_EN : 1; - unsigned int GS_W32_EN : 1; - unsigned int VS_W32_EN : 1; - unsigned int NGG_WAVE_ID_EN : 1; - unsigned int PRIMGEN_PASSTHRU_EN : 1; - unsigned int : 6; - } gfx10Plus; -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 26; - unsigned int PRIMGEN_PASSTHRU_NO_MSG : 1; - unsigned int : 5; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 26; - unsigned int PRIMGEN_PASSTHRU_NO_MSG : 1; - unsigned int : 5; - } nv24; -#endif - struct { - unsigned int : 26; - unsigned int PRIMGEN_PASSTHRU_NO_MSG : 1; - unsigned int : 5; - } raphael; - struct { - unsigned int : 26; - unsigned int PRIMGEN_PASSTHRU_NO_MSG : 1; - unsigned int : 5; - } rembrandt; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_BUFFER_CONFIG { - struct { - unsigned int STREAM_0_BUFFER_EN : 4; - unsigned int STREAM_1_BUFFER_EN : 4; - unsigned int STREAM_2_BUFFER_EN : 4; - unsigned int STREAM_3_BUFFER_EN : 4; - unsigned int : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_BUFFER_FILLED_SIZE_0 { - struct { - unsigned int SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_BUFFER_FILLED_SIZE_1 { - struct { - unsigned int SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_BUFFER_FILLED_SIZE_2 { - struct { - unsigned int SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_BUFFER_FILLED_SIZE_3 { - struct { - unsigned int SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_BUFFER_OFFSET_0 { - struct { - unsigned int OFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_BUFFER_OFFSET_1 { - struct { - unsigned int OFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_BUFFER_OFFSET_2 { - struct { - unsigned int OFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_BUFFER_OFFSET_3 { - struct { - unsigned int OFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_BUFFER_SIZE_0 { - struct { - unsigned int SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_BUFFER_SIZE_1 { - struct { - unsigned int SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_BUFFER_SIZE_2 { - struct { - unsigned int SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_BUFFER_SIZE_3 { - struct { - unsigned int SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_CONFIG { - struct { - unsigned int STREAMOUT_0_EN : 1; - unsigned int STREAMOUT_1_EN : 1; - unsigned int STREAMOUT_2_EN : 1; - unsigned int STREAMOUT_3_EN : 1; - unsigned int RAST_STREAM : 3; - unsigned int EN_PRIMS_NEEDED_CNT : 1; - unsigned int RAST_STREAM_MASK : 4; - unsigned int : 19; - unsigned int USE_RAST_STREAM_MASK : 1; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_DELAY { - struct { - unsigned int SKIP_DELAY : 8; - unsigned int SE0_WD_DELAY : 3; - unsigned int SE1_WD_DELAY : 3; - unsigned int SE2_WD_DELAY : 3; - unsigned int SE3_WD_DELAY : 3; - unsigned int : 12; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE { - struct { - unsigned int SIZE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_DRAW_OPAQUE_OFFSET { - struct { - unsigned int OFFSET : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE { - struct { - unsigned int VERTEX_STRIDE : 9; - unsigned int : 23; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_VTX_STRIDE_0 { - struct { - unsigned int STRIDE : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_VTX_STRIDE_1 { - struct { - unsigned int STRIDE : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_VTX_STRIDE_2 { - struct { - unsigned int STRIDE : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_STRMOUT_VTX_STRIDE_3 { - struct { - unsigned int STRIDE : 10; - unsigned int : 22; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_SYS_CONFIG { - struct { - unsigned int DUAL_CORE_EN : 1; - unsigned int MAX_LS_HS_THDGRP : 6; - unsigned int ADC_EVENT_FILTER_DISABLE : 1; - unsigned int : 24; - } bits, bitfields; - struct { - unsigned int : 8; - unsigned int NUM_SUBGROUPS_IN_FLIGHT : 11; - unsigned int : 13; - } gfx103PlusExclusive; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_TESS_DISTRIBUTION { - struct { - unsigned int ACCUM_ISOLINE : 8; - unsigned int ACCUM_TRI : 8; - unsigned int ACCUM_QUAD : 8; - unsigned int DONUT_SPLIT : 5; - unsigned int TRAP_SPLIT : 3; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_TF_MEMORY_BASE { - struct { - unsigned int BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_TF_MEMORY_BASE_HI { - struct { - unsigned int BASE_HI : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_TF_MEMORY_BASE_HI_UMD { - struct { - unsigned int BASE_HI : 8; - unsigned int : 24; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_TF_MEMORY_BASE_UMD { - struct { - unsigned int BASE : 32; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_TF_PARAM { - struct { - unsigned int TYPE : 2; - unsigned int PARTITIONING : 3; - unsigned int TOPOLOGY : 3; - unsigned int RESERVED_REDUC_AXIS : 1; - unsigned int : 5; - unsigned int DISABLE_DONUTS : 1; - unsigned int : 2; - unsigned int DISTRIBUTION_MODE : 2; - unsigned int : 13; - } bits, bitfields; - struct { - unsigned int : 15; - unsigned int RDREQ_POLICY : 1; - unsigned int : 16; - } gfx09; - struct { - unsigned int : 9; - unsigned int DEPRECATED : 1; - unsigned int : 22; - } gfx09_10; - struct { - unsigned int : 10; - unsigned int NUM_DS_WAVES_PER_SIMD : 4; - unsigned int : 1; - unsigned int RDREQ_POLICY : 2; - unsigned int : 2; - unsigned int DETECT_ONE : 1; - unsigned int DETECT_ZERO : 1; - unsigned int : 2; - unsigned int MTYPE : 3; - unsigned int : 6; - } gfx10Plus; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int : 9; - unsigned int NOT_USED : 1; - unsigned int : 22; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_TF_RING_SIZE { - struct { - unsigned int SIZE : 16; - unsigned int : 16; - } gfx09_10; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int SIZE : 17; - unsigned int : 15; - } gfx11; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_TF_RING_SIZE_UMD { - struct { - unsigned int SIZE : 16; - unsigned int : 16; - } most; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_VERTEX_REUSE_BLOCK_CNTL { - struct { - unsigned int VTX_REUSE_DEPTH : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_VS_MAX_WAVE_ID { - struct { - unsigned int MAX_WAVE_ID : 12; - unsigned int : 20; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_VTX_CNT_EN { - struct { - unsigned int VTX_CNT_EN : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union VGT_VTX_VECT_EJECT_REG { - struct { - unsigned int PRIM_COUNT : 7; - unsigned int : 25; - } gfx09; - struct { - unsigned int PRIM_COUNT : 10; - unsigned int : 22; - } gfx10; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_BUF_RESOURCE_1 { - struct { - unsigned int POS_BUF_SIZE : 16; - unsigned int INDEX_BUF_SIZE : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_BUF_RESOURCE_2 { - struct { - unsigned int PARAM_BUF_SIZE : 13; - unsigned int : 2; - unsigned int ADDR_MODE : 1; - unsigned int CNTL_SB_BUF_SIZE : 16; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_CNTL_SB_BUF_BASE { - struct { - unsigned int BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_CNTL_SB_BUF_BASE_HI { - struct { - unsigned int BASE_HI : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_CNTL_STATUS { - struct { - unsigned int WD_BUSY : 1; - unsigned int WD_SPL_DMA_BUSY : 1; - unsigned int WD_SPL_DI_BUSY : 1; - unsigned int WD_ADC_BUSY : 1; - unsigned int : 28; - } most; - struct { - unsigned int VR3_BUSY : 1; - unsigned int VR2_BUSY : 1; - unsigned int VR1_BUSY : 1; - unsigned int VR0_BUSY : 1; - unsigned int HS3_BUSY : 1; - unsigned int HS2_BUSY : 1; - unsigned int HS1_BUSY : 1; - unsigned int HS0_BUSY : 1; - unsigned int GS3_BUSY : 1; - unsigned int GS2_BUSY : 1; - unsigned int GS1_BUSY : 1; - unsigned int GS0_BUSY : 1; - unsigned int NGG3_BUSY : 1; - unsigned int NGG2_BUSY : 1; - unsigned int NGG1_BUSY : 1; - unsigned int NGG0_BUSY : 1; - unsigned int DIST_BUSY : 1; - unsigned int DIST_BE_BUSY : 1; - unsigned int : 6; - unsigned int TE3_BUSY : 1; - unsigned int TE2_BUSY : 1; - unsigned int TE1_BUSY : 1; - unsigned int TE0_BUSY : 1; - unsigned int : 4; - } gfx103Derivative; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 - struct { - unsigned int DIST_BUSY : 1; - unsigned int DIST_BE_BUSY : 1; - unsigned int GE_UTCL1_BUSY : 1; - unsigned int WD_TE11_BUSY : 1; - unsigned int PC_MANAGER_BUSY : 1; - unsigned int WLC_BUSY : 1; - unsigned int : 26; - } gfx11; -#endif -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 18; - unsigned int WD_TE11_BUSY : 1; - unsigned int SA3_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA2_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA1_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA0_OUTPUT_BLOCK_BUSY : 1; - unsigned int GE_UTCL1_BUSY : 1; - unsigned int : 4; - unsigned int WLC_BUSY : 1; - unsigned int PC_MANAGER_BUSY : 1; - unsigned int : 2; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 18; - unsigned int SA3_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA2_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA1_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA0_OUTPUT_BLOCK_BUSY : 1; - unsigned int GE_UTCL1_BUSY : 1; - unsigned int WD_TE11_BUSY : 1; - unsigned int : 4; - unsigned int PC_MANAGER_BUSY : 1; - unsigned int WLC_BUSY : 1; - unsigned int : 2; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 18; - unsigned int SA3_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA2_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA1_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA0_OUTPUT_BLOCK_BUSY : 1; - unsigned int GE_UTCL1_BUSY : 1; - unsigned int WD_TE11_BUSY : 1; - unsigned int : 4; - unsigned int PC_MANAGER_BUSY : 1; - unsigned int WLC_BUSY : 1; - unsigned int : 2; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 18; - unsigned int SA3_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA2_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA1_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA0_OUTPUT_BLOCK_BUSY : 1; - unsigned int GE_UTCL1_BUSY : 1; - unsigned int WD_TE11_BUSY : 1; - unsigned int : 4; - unsigned int PC_MANAGER_BUSY : 1; - unsigned int WLC_BUSY : 1; - unsigned int : 2; - } nv24; -#endif - struct { - unsigned int : 18; - unsigned int SA3_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA2_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA1_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA0_OUTPUT_BLOCK_BUSY : 1; - unsigned int GE_UTCL1_BUSY : 1; - unsigned int WD_TE11_BUSY : 1; - unsigned int : 4; - unsigned int PC_MANAGER_BUSY : 1; - unsigned int WLC_BUSY : 1; - unsigned int : 2; - } raphael; - struct { - unsigned int : 18; - unsigned int SA3_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA2_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA1_OUTPUT_BLOCK_BUSY : 1; - unsigned int SA0_OUTPUT_BLOCK_BUSY : 1; - unsigned int GE_UTCL1_BUSY : 1; - unsigned int WD_TE11_BUSY : 1; - unsigned int : 4; - unsigned int PC_MANAGER_BUSY : 1; - unsigned int WLC_BUSY : 1; - unsigned int : 2; - } rembrandt; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_ENHANCE { - struct { - unsigned int MISC : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_INDEX_BUF_BASE { - struct { - unsigned int BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_INDEX_BUF_BASE_HI { - struct { - unsigned int BASE_HI : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_PERFCOUNTER0_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_PERFCOUNTER0_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_PERFCOUNTER0_SELECT { - struct { - unsigned int PERF_SEL : 8; - unsigned int : 20; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_PERFCOUNTER1_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_PERFCOUNTER1_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_PERFCOUNTER1_SELECT { - struct { - unsigned int PERF_SEL : 8; - unsigned int : 20; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_PERFCOUNTER2_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_PERFCOUNTER2_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_PERFCOUNTER2_SELECT { - struct { - unsigned int PERF_SEL : 8; - unsigned int : 20; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_PERFCOUNTER3_HI { - struct { - unsigned int PERFCOUNTER_HI : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_PERFCOUNTER3_LO { - struct { - unsigned int PERFCOUNTER_LO : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_PERFCOUNTER3_SELECT { - struct { - unsigned int PERF_SEL : 8; - unsigned int : 20; - unsigned int PERF_MODE : 4; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_POS_BUF_BASE { - struct { - unsigned int BASE : 32; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_POS_BUF_BASE_HI { - struct { - unsigned int BASE_HI : 8; - unsigned int : 24; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_QOS { - struct { - unsigned int DRAW_STALL : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_UTCL1_CNTL { - struct { - unsigned int XNACK_REDO_TIMER_CNT : 20; - unsigned int : 3; - unsigned int VMID_RESET_MODE : 1; - unsigned int DROP_MODE : 1; - unsigned int BYPASS : 1; - unsigned int INVALIDATE : 1; - unsigned int FRAG_LIMIT_MODE : 1; - unsigned int FORCE_SNOOP : 1; - unsigned int : 3; - } bits, bitfields; - struct { - unsigned int : 29; - unsigned int FORCE_SD_VMID_DIRTY : 1; - unsigned int : 2; - } gfx09; - struct { - unsigned int : 29; - unsigned int MTYPE_OVERRIDE : 1; - unsigned int : 2; - } gfx10Plus; -#if CHIP_HDR_NAVI21 - struct { - unsigned int : 30; - unsigned int LLC_NOALLOC_OVERRIDE : 1; - unsigned int : 1; - } nv21; -#endif -#if CHIP_HDR_NAVI22 - struct { - unsigned int : 30; - unsigned int LLC_NOALLOC_OVERRIDE : 1; - unsigned int : 1; - } nv22; -#endif -#if CHIP_HDR_NAVI23 - struct { - unsigned int : 30; - unsigned int LLC_NOALLOC_OVERRIDE : 1; - unsigned int : 1; - } nv23; -#endif -#if CHIP_HDR_NAVI24 - struct { - unsigned int : 30; - unsigned int LLC_NOALLOC_OVERRIDE : 1; - unsigned int : 1; - } nv24; -#endif -#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 - struct { - unsigned int : 30; - unsigned int LLC_NOALLOC_OVERRIDE : 1; - unsigned int : 1; - } nv3x; -#endif - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union WD_UTCL1_STATUS { - struct { - unsigned int FAULT_DETECTED : 1; - unsigned int RETRY_DETECTED : 1; - unsigned int PRT_DETECTED : 1; - unsigned int : 5; - unsigned int FAULT_UTCL1ID : 6; - unsigned int : 2; - unsigned int RETRY_UTCL1ID : 6; - unsigned int : 2; - unsigned int PRT_UTCL1ID : 6; - unsigned int : 2; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -union XDMA_SLV_FLIP_PENDING { - struct { - unsigned int XDMA_SLV_FLIP_PENDING : 1; - unsigned int : 31; - } bits, bitfields; - - unsigned int u32All; - signed int i32All; - float f32All; -}; - -} // inline namespace Chip -} // namespace Gfx9 -} // namespace Pal diff --git a/lgc/imported/chip/gfx9/gfx9_plus_merged_typedef.h b/lgc/imported/chip/gfx9/gfx9_plus_merged_typedef.h deleted file mode 100644 index 7dd2e4cebf..0000000000 --- a/lgc/imported/chip/gfx9/gfx9_plus_merged_typedef.h +++ /dev/null @@ -1,3303 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ - -#pragma once - -namespace Pal -{ -namespace Gfx9 -{ -inline namespace Chip -{ -typedef union ATC_L2_PERFCOUNTER0_CFG regATC_L2_PERFCOUNTER0_CFG; -typedef union ATC_L2_PERFCOUNTER1_CFG regATC_L2_PERFCOUNTER1_CFG; -typedef union ATC_L2_PERFCOUNTER_HI regATC_L2_PERFCOUNTER_HI; -typedef union ATC_L2_PERFCOUNTER_LO regATC_L2_PERFCOUNTER_LO; -typedef union ATC_L2_PERFCOUNTER_RSLT_CNTL regATC_L2_PERFCOUNTER_RSLT_CNTL; -typedef union ATC_PERFCOUNTER0_CFG regATC_PERFCOUNTER0_CFG; -typedef union ATC_PERFCOUNTER1_CFG regATC_PERFCOUNTER1_CFG; -typedef union ATC_PERFCOUNTER2_CFG regATC_PERFCOUNTER2_CFG; -typedef union ATC_PERFCOUNTER3_CFG regATC_PERFCOUNTER3_CFG; -typedef union ATC_PERFCOUNTER_HI regATC_PERFCOUNTER_HI; -typedef union ATC_PERFCOUNTER_LO regATC_PERFCOUNTER_LO; -typedef union ATC_PERFCOUNTER_RSLT_CNTL regATC_PERFCOUNTER_RSLT_CNTL; -typedef union CB_BLEND0_CONTROL regCB_BLEND0_CONTROL; -typedef union CB_BLEND1_CONTROL regCB_BLEND1_CONTROL; -typedef union CB_BLEND2_CONTROL regCB_BLEND2_CONTROL; -typedef union CB_BLEND3_CONTROL regCB_BLEND3_CONTROL; -typedef union CB_BLEND4_CONTROL regCB_BLEND4_CONTROL; -typedef union CB_BLEND5_CONTROL regCB_BLEND5_CONTROL; -typedef union CB_BLEND6_CONTROL regCB_BLEND6_CONTROL; -typedef union CB_BLEND7_CONTROL regCB_BLEND7_CONTROL; -typedef union CB_BLEND_ALPHA regCB_BLEND_ALPHA; -typedef union CB_BLEND_BLUE regCB_BLEND_BLUE; -typedef union CB_BLEND_GREEN regCB_BLEND_GREEN; -typedef union CB_BLEND_RED regCB_BLEND_RED; -typedef union CB_CACHE_EVICT_POINTS regCB_CACHE_EVICT_POINTS; -typedef union CB_CGTT_SCLK_CTRL regCB_CGTT_SCLK_CTRL; -typedef union CB_CGTT_SCLK_CTRL1 regCB_CGTT_SCLK_CTRL1; -typedef union CB_COLOR0_ATTRIB regCB_COLOR0_ATTRIB; -typedef union CB_COLOR0_ATTRIB2 regCB_COLOR0_ATTRIB2; -typedef union CB_COLOR0_ATTRIB3 regCB_COLOR0_ATTRIB3; -typedef union CB_COLOR0_BASE regCB_COLOR0_BASE; -typedef union CB_COLOR0_BASE_EXT regCB_COLOR0_BASE_EXT; -typedef union CB_COLOR0_CLEAR_WORD0 regCB_COLOR0_CLEAR_WORD0; -typedef union CB_COLOR0_CLEAR_WORD1 regCB_COLOR0_CLEAR_WORD1; -typedef union CB_COLOR0_CMASK regCB_COLOR0_CMASK; -typedef union CB_COLOR0_CMASK_BASE_EXT regCB_COLOR0_CMASK_BASE_EXT; -typedef union CB_COLOR0_CMASK_SLICE regCB_COLOR0_CMASK_SLICE; -typedef union CB_COLOR0_DCC_BASE regCB_COLOR0_DCC_BASE; -typedef union CB_COLOR0_DCC_BASE_EXT regCB_COLOR0_DCC_BASE_EXT; -typedef union CB_COLOR0_DCC_CONTROL regCB_COLOR0_DCC_CONTROL; -typedef union CB_COLOR0_FMASK regCB_COLOR0_FMASK; -typedef union CB_COLOR0_FMASK_BASE_EXT regCB_COLOR0_FMASK_BASE_EXT; -typedef union CB_COLOR0_FMASK_SLICE regCB_COLOR0_FMASK_SLICE; -typedef union CB_COLOR0_INFO regCB_COLOR0_INFO; -typedef union CB_COLOR0_PITCH regCB_COLOR0_PITCH; -typedef union CB_COLOR0_SLICE regCB_COLOR0_SLICE; -typedef union CB_COLOR0_VIEW regCB_COLOR0_VIEW; -typedef union CB_COLOR1_ATTRIB regCB_COLOR1_ATTRIB; -typedef union CB_COLOR1_ATTRIB2 regCB_COLOR1_ATTRIB2; -typedef union CB_COLOR1_ATTRIB3 regCB_COLOR1_ATTRIB3; -typedef union CB_COLOR1_BASE regCB_COLOR1_BASE; -typedef union CB_COLOR1_BASE_EXT regCB_COLOR1_BASE_EXT; -typedef union CB_COLOR1_CLEAR_WORD0 regCB_COLOR1_CLEAR_WORD0; -typedef union CB_COLOR1_CLEAR_WORD1 regCB_COLOR1_CLEAR_WORD1; -typedef union CB_COLOR1_CMASK regCB_COLOR1_CMASK; -typedef union CB_COLOR1_CMASK_BASE_EXT regCB_COLOR1_CMASK_BASE_EXT; -typedef union CB_COLOR1_CMASK_SLICE regCB_COLOR1_CMASK_SLICE; -typedef union CB_COLOR1_DCC_BASE regCB_COLOR1_DCC_BASE; -typedef union CB_COLOR1_DCC_BASE_EXT regCB_COLOR1_DCC_BASE_EXT; -typedef union CB_COLOR1_DCC_CONTROL regCB_COLOR1_DCC_CONTROL; -typedef union CB_COLOR1_FMASK regCB_COLOR1_FMASK; -typedef union CB_COLOR1_FMASK_BASE_EXT regCB_COLOR1_FMASK_BASE_EXT; -typedef union CB_COLOR1_FMASK_SLICE regCB_COLOR1_FMASK_SLICE; -typedef union CB_COLOR1_INFO regCB_COLOR1_INFO; -typedef union CB_COLOR1_PITCH regCB_COLOR1_PITCH; -typedef union CB_COLOR1_SLICE regCB_COLOR1_SLICE; -typedef union CB_COLOR1_VIEW regCB_COLOR1_VIEW; -typedef union CB_COLOR2_ATTRIB regCB_COLOR2_ATTRIB; -typedef union CB_COLOR2_ATTRIB2 regCB_COLOR2_ATTRIB2; -typedef union CB_COLOR2_ATTRIB3 regCB_COLOR2_ATTRIB3; -typedef union CB_COLOR2_BASE regCB_COLOR2_BASE; -typedef union CB_COLOR2_BASE_EXT regCB_COLOR2_BASE_EXT; -typedef union CB_COLOR2_CLEAR_WORD0 regCB_COLOR2_CLEAR_WORD0; -typedef union CB_COLOR2_CLEAR_WORD1 regCB_COLOR2_CLEAR_WORD1; -typedef union CB_COLOR2_CMASK regCB_COLOR2_CMASK; -typedef union CB_COLOR2_CMASK_BASE_EXT regCB_COLOR2_CMASK_BASE_EXT; -typedef union CB_COLOR2_CMASK_SLICE regCB_COLOR2_CMASK_SLICE; -typedef union CB_COLOR2_DCC_BASE regCB_COLOR2_DCC_BASE; -typedef union CB_COLOR2_DCC_BASE_EXT regCB_COLOR2_DCC_BASE_EXT; -typedef union CB_COLOR2_DCC_CONTROL regCB_COLOR2_DCC_CONTROL; -typedef union CB_COLOR2_FMASK regCB_COLOR2_FMASK; -typedef union CB_COLOR2_FMASK_BASE_EXT regCB_COLOR2_FMASK_BASE_EXT; -typedef union CB_COLOR2_FMASK_SLICE regCB_COLOR2_FMASK_SLICE; -typedef union CB_COLOR2_INFO regCB_COLOR2_INFO; -typedef union CB_COLOR2_PITCH regCB_COLOR2_PITCH; -typedef union CB_COLOR2_SLICE regCB_COLOR2_SLICE; -typedef union CB_COLOR2_VIEW regCB_COLOR2_VIEW; -typedef union CB_COLOR3_ATTRIB regCB_COLOR3_ATTRIB; -typedef union CB_COLOR3_ATTRIB2 regCB_COLOR3_ATTRIB2; -typedef union CB_COLOR3_ATTRIB3 regCB_COLOR3_ATTRIB3; -typedef union CB_COLOR3_BASE regCB_COLOR3_BASE; -typedef union CB_COLOR3_BASE_EXT regCB_COLOR3_BASE_EXT; -typedef union CB_COLOR3_CLEAR_WORD0 regCB_COLOR3_CLEAR_WORD0; -typedef union CB_COLOR3_CLEAR_WORD1 regCB_COLOR3_CLEAR_WORD1; -typedef union CB_COLOR3_CMASK regCB_COLOR3_CMASK; -typedef union CB_COLOR3_CMASK_BASE_EXT regCB_COLOR3_CMASK_BASE_EXT; -typedef union CB_COLOR3_CMASK_SLICE regCB_COLOR3_CMASK_SLICE; -typedef union CB_COLOR3_DCC_BASE regCB_COLOR3_DCC_BASE; -typedef union CB_COLOR3_DCC_BASE_EXT regCB_COLOR3_DCC_BASE_EXT; -typedef union CB_COLOR3_DCC_CONTROL regCB_COLOR3_DCC_CONTROL; -typedef union CB_COLOR3_FMASK regCB_COLOR3_FMASK; -typedef union CB_COLOR3_FMASK_BASE_EXT regCB_COLOR3_FMASK_BASE_EXT; -typedef union CB_COLOR3_FMASK_SLICE regCB_COLOR3_FMASK_SLICE; -typedef union CB_COLOR3_INFO regCB_COLOR3_INFO; -typedef union CB_COLOR3_PITCH regCB_COLOR3_PITCH; -typedef union CB_COLOR3_SLICE regCB_COLOR3_SLICE; -typedef union CB_COLOR3_VIEW regCB_COLOR3_VIEW; -typedef union CB_COLOR4_ATTRIB regCB_COLOR4_ATTRIB; -typedef union CB_COLOR4_ATTRIB2 regCB_COLOR4_ATTRIB2; -typedef union CB_COLOR4_ATTRIB3 regCB_COLOR4_ATTRIB3; -typedef union CB_COLOR4_BASE regCB_COLOR4_BASE; -typedef union CB_COLOR4_BASE_EXT regCB_COLOR4_BASE_EXT; -typedef union CB_COLOR4_CLEAR_WORD0 regCB_COLOR4_CLEAR_WORD0; -typedef union CB_COLOR4_CLEAR_WORD1 regCB_COLOR4_CLEAR_WORD1; -typedef union CB_COLOR4_CMASK regCB_COLOR4_CMASK; -typedef union CB_COLOR4_CMASK_BASE_EXT regCB_COLOR4_CMASK_BASE_EXT; -typedef union CB_COLOR4_CMASK_SLICE regCB_COLOR4_CMASK_SLICE; -typedef union CB_COLOR4_DCC_BASE regCB_COLOR4_DCC_BASE; -typedef union CB_COLOR4_DCC_BASE_EXT regCB_COLOR4_DCC_BASE_EXT; -typedef union CB_COLOR4_DCC_CONTROL regCB_COLOR4_DCC_CONTROL; -typedef union CB_COLOR4_FMASK regCB_COLOR4_FMASK; -typedef union CB_COLOR4_FMASK_BASE_EXT regCB_COLOR4_FMASK_BASE_EXT; -typedef union CB_COLOR4_FMASK_SLICE regCB_COLOR4_FMASK_SLICE; -typedef union CB_COLOR4_INFO regCB_COLOR4_INFO; -typedef union CB_COLOR4_PITCH regCB_COLOR4_PITCH; -typedef union CB_COLOR4_SLICE regCB_COLOR4_SLICE; -typedef union CB_COLOR4_VIEW regCB_COLOR4_VIEW; -typedef union CB_COLOR5_ATTRIB regCB_COLOR5_ATTRIB; -typedef union CB_COLOR5_ATTRIB2 regCB_COLOR5_ATTRIB2; -typedef union CB_COLOR5_ATTRIB3 regCB_COLOR5_ATTRIB3; -typedef union CB_COLOR5_BASE regCB_COLOR5_BASE; -typedef union CB_COLOR5_BASE_EXT regCB_COLOR5_BASE_EXT; -typedef union CB_COLOR5_CLEAR_WORD0 regCB_COLOR5_CLEAR_WORD0; -typedef union CB_COLOR5_CLEAR_WORD1 regCB_COLOR5_CLEAR_WORD1; -typedef union CB_COLOR5_CMASK regCB_COLOR5_CMASK; -typedef union CB_COLOR5_CMASK_BASE_EXT regCB_COLOR5_CMASK_BASE_EXT; -typedef union CB_COLOR5_CMASK_SLICE regCB_COLOR5_CMASK_SLICE; -typedef union CB_COLOR5_DCC_BASE regCB_COLOR5_DCC_BASE; -typedef union CB_COLOR5_DCC_BASE_EXT regCB_COLOR5_DCC_BASE_EXT; -typedef union CB_COLOR5_DCC_CONTROL regCB_COLOR5_DCC_CONTROL; -typedef union CB_COLOR5_FMASK regCB_COLOR5_FMASK; -typedef union CB_COLOR5_FMASK_BASE_EXT regCB_COLOR5_FMASK_BASE_EXT; -typedef union CB_COLOR5_FMASK_SLICE regCB_COLOR5_FMASK_SLICE; -typedef union CB_COLOR5_INFO regCB_COLOR5_INFO; -typedef union CB_COLOR5_PITCH regCB_COLOR5_PITCH; -typedef union CB_COLOR5_SLICE regCB_COLOR5_SLICE; -typedef union CB_COLOR5_VIEW regCB_COLOR5_VIEW; -typedef union CB_COLOR6_ATTRIB regCB_COLOR6_ATTRIB; -typedef union CB_COLOR6_ATTRIB2 regCB_COLOR6_ATTRIB2; -typedef union CB_COLOR6_ATTRIB3 regCB_COLOR6_ATTRIB3; -typedef union CB_COLOR6_BASE regCB_COLOR6_BASE; -typedef union CB_COLOR6_BASE_EXT regCB_COLOR6_BASE_EXT; -typedef union CB_COLOR6_CLEAR_WORD0 regCB_COLOR6_CLEAR_WORD0; -typedef union CB_COLOR6_CLEAR_WORD1 regCB_COLOR6_CLEAR_WORD1; -typedef union CB_COLOR6_CMASK regCB_COLOR6_CMASK; -typedef union CB_COLOR6_CMASK_BASE_EXT regCB_COLOR6_CMASK_BASE_EXT; -typedef union CB_COLOR6_CMASK_SLICE regCB_COLOR6_CMASK_SLICE; -typedef union CB_COLOR6_DCC_BASE regCB_COLOR6_DCC_BASE; -typedef union CB_COLOR6_DCC_BASE_EXT regCB_COLOR6_DCC_BASE_EXT; -typedef union CB_COLOR6_DCC_CONTROL regCB_COLOR6_DCC_CONTROL; -typedef union CB_COLOR6_FMASK regCB_COLOR6_FMASK; -typedef union CB_COLOR6_FMASK_BASE_EXT regCB_COLOR6_FMASK_BASE_EXT; -typedef union CB_COLOR6_FMASK_SLICE regCB_COLOR6_FMASK_SLICE; -typedef union CB_COLOR6_INFO regCB_COLOR6_INFO; -typedef union CB_COLOR6_PITCH regCB_COLOR6_PITCH; -typedef union CB_COLOR6_SLICE regCB_COLOR6_SLICE; -typedef union CB_COLOR6_VIEW regCB_COLOR6_VIEW; -typedef union CB_COLOR7_ATTRIB regCB_COLOR7_ATTRIB; -typedef union CB_COLOR7_ATTRIB2 regCB_COLOR7_ATTRIB2; -typedef union CB_COLOR7_ATTRIB3 regCB_COLOR7_ATTRIB3; -typedef union CB_COLOR7_BASE regCB_COLOR7_BASE; -typedef union CB_COLOR7_BASE_EXT regCB_COLOR7_BASE_EXT; -typedef union CB_COLOR7_CLEAR_WORD0 regCB_COLOR7_CLEAR_WORD0; -typedef union CB_COLOR7_CLEAR_WORD1 regCB_COLOR7_CLEAR_WORD1; -typedef union CB_COLOR7_CMASK regCB_COLOR7_CMASK; -typedef union CB_COLOR7_CMASK_BASE_EXT regCB_COLOR7_CMASK_BASE_EXT; -typedef union CB_COLOR7_CMASK_SLICE regCB_COLOR7_CMASK_SLICE; -typedef union CB_COLOR7_DCC_BASE regCB_COLOR7_DCC_BASE; -typedef union CB_COLOR7_DCC_BASE_EXT regCB_COLOR7_DCC_BASE_EXT; -typedef union CB_COLOR7_DCC_CONTROL regCB_COLOR7_DCC_CONTROL; -typedef union CB_COLOR7_FMASK regCB_COLOR7_FMASK; -typedef union CB_COLOR7_FMASK_BASE_EXT regCB_COLOR7_FMASK_BASE_EXT; -typedef union CB_COLOR7_FMASK_SLICE regCB_COLOR7_FMASK_SLICE; -typedef union CB_COLOR7_INFO regCB_COLOR7_INFO; -typedef union CB_COLOR7_PITCH regCB_COLOR7_PITCH; -typedef union CB_COLOR7_SLICE regCB_COLOR7_SLICE; -typedef union CB_COLOR7_VIEW regCB_COLOR7_VIEW; -typedef union CB_COLOR_CONTROL regCB_COLOR_CONTROL; -typedef union CB_COVERAGE_OUT_CONTROL regCB_COVERAGE_OUT_CONTROL; -typedef union CB_DCC_CONFIG regCB_DCC_CONFIG; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union CB_DCC_CONFIG2 regCB_DCC_CONFIG2; -#endif -typedef union CB_DCC_CONTROL regCB_DCC_CONTROL; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union CB_FDCC_CONTROL regCB_FDCC_CONTROL; -typedef union CB_FGCG_SRAM_OVERRIDE regCB_FGCG_SRAM_OVERRIDE; -#endif -typedef union CB_HW_CONTROL regCB_HW_CONTROL; -typedef union CB_HW_CONTROL_1 regCB_HW_CONTROL_1; -typedef union CB_HW_CONTROL_2 regCB_HW_CONTROL_2; -typedef union CB_HW_CONTROL_3 regCB_HW_CONTROL_3; -typedef union CB_HW_CONTROL_4 regCB_HW_CONTROL_4; -typedef union CB_HW_MEM_ARBITER_RD regCB_HW_MEM_ARBITER_RD; -typedef union CB_HW_MEM_ARBITER_WR regCB_HW_MEM_ARBITER_WR; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union CB_KEY_OVERRIDE_0 regCB_KEY_OVERRIDE_0; -typedef union CB_KEY_OVERRIDE_1 regCB_KEY_OVERRIDE_1; -typedef union CB_KEY_OVERRIDE_2 regCB_KEY_OVERRIDE_2; -typedef union CB_KEY_OVERRIDE_3 regCB_KEY_OVERRIDE_3; -typedef union CB_KEY_OVERRIDE_4 regCB_KEY_OVERRIDE_4; -typedef union CB_KEY_OVERRIDE_5 regCB_KEY_OVERRIDE_5; -typedef union CB_KEY_OVERRIDE_6 regCB_KEY_OVERRIDE_6; -typedef union CB_KEY_OVERRIDE_7 regCB_KEY_OVERRIDE_7; -#endif -typedef union CB_MRT0_EPITCH regCB_MRT0_EPITCH; -typedef union CB_MRT1_EPITCH regCB_MRT1_EPITCH; -typedef union CB_MRT2_EPITCH regCB_MRT2_EPITCH; -typedef union CB_MRT3_EPITCH regCB_MRT3_EPITCH; -typedef union CB_MRT4_EPITCH regCB_MRT4_EPITCH; -typedef union CB_MRT5_EPITCH regCB_MRT5_EPITCH; -typedef union CB_MRT6_EPITCH regCB_MRT6_EPITCH; -typedef union CB_MRT7_EPITCH regCB_MRT7_EPITCH; -typedef union CB_PERFCOUNTER0_HI regCB_PERFCOUNTER0_HI; -typedef union CB_PERFCOUNTER0_LO regCB_PERFCOUNTER0_LO; -typedef union CB_PERFCOUNTER0_SELECT regCB_PERFCOUNTER0_SELECT; -typedef union CB_PERFCOUNTER0_SELECT1 regCB_PERFCOUNTER0_SELECT1; -typedef union CB_PERFCOUNTER1_HI regCB_PERFCOUNTER1_HI; -typedef union CB_PERFCOUNTER1_LO regCB_PERFCOUNTER1_LO; -typedef union CB_PERFCOUNTER1_SELECT regCB_PERFCOUNTER1_SELECT; -typedef union CB_PERFCOUNTER2_HI regCB_PERFCOUNTER2_HI; -typedef union CB_PERFCOUNTER2_LO regCB_PERFCOUNTER2_LO; -typedef union CB_PERFCOUNTER2_SELECT regCB_PERFCOUNTER2_SELECT; -typedef union CB_PERFCOUNTER3_HI regCB_PERFCOUNTER3_HI; -typedef union CB_PERFCOUNTER3_LO regCB_PERFCOUNTER3_LO; -typedef union CB_PERFCOUNTER3_SELECT regCB_PERFCOUNTER3_SELECT; -typedef union CB_PERFCOUNTER_FILTER regCB_PERFCOUNTER_FILTER; -typedef union CB_RMI_BC_GL2_CACHE_CONTROL regCB_RMI_BC_GL2_CACHE_CONTROL; -typedef union CB_RMI_GL2_CACHE_CONTROL regCB_RMI_GL2_CACHE_CONTROL; -typedef union CB_SHADER_MASK regCB_SHADER_MASK; -typedef union CB_STUTTER_CONTROL_CMASK_RDLAT regCB_STUTTER_CONTROL_CMASK_RDLAT; -typedef union CB_STUTTER_CONTROL_COLOR_RDLAT regCB_STUTTER_CONTROL_COLOR_RDLAT; -typedef union CB_STUTTER_CONTROL_FMASK_RDLAT regCB_STUTTER_CONTROL_FMASK_RDLAT; -typedef union CB_TARGET_MASK regCB_TARGET_MASK; -typedef union CHA_PERFCOUNTER0_HI regCHA_PERFCOUNTER0_HI; -typedef union CHA_PERFCOUNTER0_LO regCHA_PERFCOUNTER0_LO; -typedef union CHA_PERFCOUNTER0_SELECT regCHA_PERFCOUNTER0_SELECT; -typedef union CHA_PERFCOUNTER0_SELECT1 regCHA_PERFCOUNTER0_SELECT1; -typedef union CHA_PERFCOUNTER1_HI regCHA_PERFCOUNTER1_HI; -typedef union CHA_PERFCOUNTER1_LO regCHA_PERFCOUNTER1_LO; -typedef union CHA_PERFCOUNTER1_SELECT regCHA_PERFCOUNTER1_SELECT; -typedef union CHA_PERFCOUNTER2_HI regCHA_PERFCOUNTER2_HI; -typedef union CHA_PERFCOUNTER2_LO regCHA_PERFCOUNTER2_LO; -typedef union CHA_PERFCOUNTER2_SELECT regCHA_PERFCOUNTER2_SELECT; -typedef union CHA_PERFCOUNTER3_HI regCHA_PERFCOUNTER3_HI; -typedef union CHA_PERFCOUNTER3_LO regCHA_PERFCOUNTER3_LO; -typedef union CHA_PERFCOUNTER3_SELECT regCHA_PERFCOUNTER3_SELECT; -typedef union CHCG_PERFCOUNTER0_HI regCHCG_PERFCOUNTER0_HI; -typedef union CHCG_PERFCOUNTER0_LO regCHCG_PERFCOUNTER0_LO; -typedef union CHCG_PERFCOUNTER0_SELECT regCHCG_PERFCOUNTER0_SELECT; -typedef union CHCG_PERFCOUNTER0_SELECT1 regCHCG_PERFCOUNTER0_SELECT1; -typedef union CHCG_PERFCOUNTER1_HI regCHCG_PERFCOUNTER1_HI; -typedef union CHCG_PERFCOUNTER1_LO regCHCG_PERFCOUNTER1_LO; -typedef union CHCG_PERFCOUNTER1_SELECT regCHCG_PERFCOUNTER1_SELECT; -typedef union CHCG_PERFCOUNTER2_HI regCHCG_PERFCOUNTER2_HI; -typedef union CHCG_PERFCOUNTER2_LO regCHCG_PERFCOUNTER2_LO; -typedef union CHCG_PERFCOUNTER2_SELECT regCHCG_PERFCOUNTER2_SELECT; -typedef union CHCG_PERFCOUNTER3_HI regCHCG_PERFCOUNTER3_HI; -typedef union CHCG_PERFCOUNTER3_LO regCHCG_PERFCOUNTER3_LO; -typedef union CHCG_PERFCOUNTER3_SELECT regCHCG_PERFCOUNTER3_SELECT; -typedef union CHC_PERFCOUNTER0_HI regCHC_PERFCOUNTER0_HI; -typedef union CHC_PERFCOUNTER0_LO regCHC_PERFCOUNTER0_LO; -typedef union CHC_PERFCOUNTER0_SELECT regCHC_PERFCOUNTER0_SELECT; -typedef union CHC_PERFCOUNTER0_SELECT1 regCHC_PERFCOUNTER0_SELECT1; -typedef union CHC_PERFCOUNTER1_HI regCHC_PERFCOUNTER1_HI; -typedef union CHC_PERFCOUNTER1_LO regCHC_PERFCOUNTER1_LO; -typedef union CHC_PERFCOUNTER1_SELECT regCHC_PERFCOUNTER1_SELECT; -typedef union CHC_PERFCOUNTER2_HI regCHC_PERFCOUNTER2_HI; -typedef union CHC_PERFCOUNTER2_LO regCHC_PERFCOUNTER2_LO; -typedef union CHC_PERFCOUNTER2_SELECT regCHC_PERFCOUNTER2_SELECT; -typedef union CHC_PERFCOUNTER3_HI regCHC_PERFCOUNTER3_HI; -typedef union CHC_PERFCOUNTER3_LO regCHC_PERFCOUNTER3_LO; -typedef union CHC_PERFCOUNTER3_SELECT regCHC_PERFCOUNTER3_SELECT; -typedef union COHER_DEST_BASE_0 regCOHER_DEST_BASE_0; -typedef union COHER_DEST_BASE_1 regCOHER_DEST_BASE_1; -typedef union COHER_DEST_BASE_2 regCOHER_DEST_BASE_2; -typedef union COHER_DEST_BASE_3 regCOHER_DEST_BASE_3; -typedef union COHER_DEST_BASE_HI_0 regCOHER_DEST_BASE_HI_0; -typedef union COHER_DEST_BASE_HI_1 regCOHER_DEST_BASE_HI_1; -typedef union COHER_DEST_BASE_HI_2 regCOHER_DEST_BASE_HI_2; -typedef union COHER_DEST_BASE_HI_3 regCOHER_DEST_BASE_HI_3; -typedef union COMPUTE_DDID_INDEX regCOMPUTE_DDID_INDEX; -typedef union COMPUTE_DESTINATION_EN_SE0 regCOMPUTE_DESTINATION_EN_SE0; -typedef union COMPUTE_DESTINATION_EN_SE1 regCOMPUTE_DESTINATION_EN_SE1; -typedef union COMPUTE_DESTINATION_EN_SE2 regCOMPUTE_DESTINATION_EN_SE2; -typedef union COMPUTE_DESTINATION_EN_SE3 regCOMPUTE_DESTINATION_EN_SE3; -typedef union COMPUTE_DIM_X regCOMPUTE_DIM_X; -typedef union COMPUTE_DIM_Y regCOMPUTE_DIM_Y; -typedef union COMPUTE_DIM_Z regCOMPUTE_DIM_Z; -typedef union COMPUTE_DISPATCH_END regCOMPUTE_DISPATCH_END; -typedef union COMPUTE_DISPATCH_ID regCOMPUTE_DISPATCH_ID; -typedef union COMPUTE_DISPATCH_INITIATOR regCOMPUTE_DISPATCH_INITIATOR; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union COMPUTE_DISPATCH_INTERLEAVE regCOMPUTE_DISPATCH_INTERLEAVE; -#endif -typedef union COMPUTE_DISPATCH_PKT_ADDR_HI regCOMPUTE_DISPATCH_PKT_ADDR_HI; -typedef union COMPUTE_DISPATCH_PKT_ADDR_LO regCOMPUTE_DISPATCH_PKT_ADDR_LO; -typedef union COMPUTE_DISPATCH_SCRATCH_BASE_HI regCOMPUTE_DISPATCH_SCRATCH_BASE_HI; -typedef union COMPUTE_DISPATCH_SCRATCH_BASE_LO regCOMPUTE_DISPATCH_SCRATCH_BASE_LO; -typedef union COMPUTE_DISPATCH_TUNNEL regCOMPUTE_DISPATCH_TUNNEL; -typedef union COMPUTE_MISC_RESERVED regCOMPUTE_MISC_RESERVED; -typedef union COMPUTE_NOWHERE regCOMPUTE_NOWHERE; -typedef union COMPUTE_NUM_THREAD_X regCOMPUTE_NUM_THREAD_X; -typedef union COMPUTE_NUM_THREAD_Y regCOMPUTE_NUM_THREAD_Y; -typedef union COMPUTE_NUM_THREAD_Z regCOMPUTE_NUM_THREAD_Z; -typedef union COMPUTE_PERFCOUNT_ENABLE regCOMPUTE_PERFCOUNT_ENABLE; -typedef union COMPUTE_PGM_HI regCOMPUTE_PGM_HI; -typedef union COMPUTE_PGM_LO regCOMPUTE_PGM_LO; -typedef union COMPUTE_PGM_RSRC1 regCOMPUTE_PGM_RSRC1; -typedef union COMPUTE_PGM_RSRC2 regCOMPUTE_PGM_RSRC2; -typedef union COMPUTE_PGM_RSRC3 regCOMPUTE_PGM_RSRC3; -typedef union COMPUTE_PIPELINESTAT_ENABLE regCOMPUTE_PIPELINESTAT_ENABLE; -typedef union COMPUTE_PREF_PRI_ACCUM_0 regCOMPUTE_PREF_PRI_ACCUM_0; -typedef union COMPUTE_PREF_PRI_ACCUM_1 regCOMPUTE_PREF_PRI_ACCUM_1; -typedef union COMPUTE_PREF_PRI_ACCUM_2 regCOMPUTE_PREF_PRI_ACCUM_2; -typedef union COMPUTE_PREF_PRI_ACCUM_3 regCOMPUTE_PREF_PRI_ACCUM_3; -typedef union COMPUTE_RELAUNCH regCOMPUTE_RELAUNCH; -typedef union COMPUTE_RELAUNCH2 regCOMPUTE_RELAUNCH2; -typedef union COMPUTE_REQ_CTRL regCOMPUTE_REQ_CTRL; -typedef union COMPUTE_RESOURCE_LIMITS regCOMPUTE_RESOURCE_LIMITS; -typedef union COMPUTE_RESTART_X regCOMPUTE_RESTART_X; -typedef union COMPUTE_RESTART_Y regCOMPUTE_RESTART_Y; -typedef union COMPUTE_RESTART_Z regCOMPUTE_RESTART_Z; -typedef union COMPUTE_SHADER_CHKSUM regCOMPUTE_SHADER_CHKSUM; -typedef union COMPUTE_START_X regCOMPUTE_START_X; -typedef union COMPUTE_START_Y regCOMPUTE_START_Y; -typedef union COMPUTE_START_Z regCOMPUTE_START_Z; -typedef union COMPUTE_STATIC_THREAD_MGMT_SE0 regCOMPUTE_STATIC_THREAD_MGMT_SE0; -typedef union COMPUTE_STATIC_THREAD_MGMT_SE1 regCOMPUTE_STATIC_THREAD_MGMT_SE1; -typedef union COMPUTE_STATIC_THREAD_MGMT_SE2 regCOMPUTE_STATIC_THREAD_MGMT_SE2; -typedef union COMPUTE_STATIC_THREAD_MGMT_SE3 regCOMPUTE_STATIC_THREAD_MGMT_SE3; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union COMPUTE_STATIC_THREAD_MGMT_SE4 regCOMPUTE_STATIC_THREAD_MGMT_SE4; -typedef union COMPUTE_STATIC_THREAD_MGMT_SE5 regCOMPUTE_STATIC_THREAD_MGMT_SE5; -typedef union COMPUTE_STATIC_THREAD_MGMT_SE6 regCOMPUTE_STATIC_THREAD_MGMT_SE6; -typedef union COMPUTE_STATIC_THREAD_MGMT_SE7 regCOMPUTE_STATIC_THREAD_MGMT_SE7; -#endif -typedef union COMPUTE_THREADGROUP_ID regCOMPUTE_THREADGROUP_ID; -typedef union COMPUTE_THREAD_TRACE_ENABLE regCOMPUTE_THREAD_TRACE_ENABLE; -typedef union COMPUTE_TMPRING_SIZE regCOMPUTE_TMPRING_SIZE; -typedef union COMPUTE_USER_ACCUM_0 regCOMPUTE_USER_ACCUM_0; -typedef union COMPUTE_USER_ACCUM_1 regCOMPUTE_USER_ACCUM_1; -typedef union COMPUTE_USER_ACCUM_2 regCOMPUTE_USER_ACCUM_2; -typedef union COMPUTE_USER_ACCUM_3 regCOMPUTE_USER_ACCUM_3; -typedef union COMPUTE_USER_DATA_0 regCOMPUTE_USER_DATA_0; -typedef union COMPUTE_USER_DATA_1 regCOMPUTE_USER_DATA_1; -typedef union COMPUTE_USER_DATA_2 regCOMPUTE_USER_DATA_2; -typedef union COMPUTE_USER_DATA_3 regCOMPUTE_USER_DATA_3; -typedef union COMPUTE_USER_DATA_4 regCOMPUTE_USER_DATA_4; -typedef union COMPUTE_USER_DATA_5 regCOMPUTE_USER_DATA_5; -typedef union COMPUTE_USER_DATA_6 regCOMPUTE_USER_DATA_6; -typedef union COMPUTE_USER_DATA_7 regCOMPUTE_USER_DATA_7; -typedef union COMPUTE_USER_DATA_8 regCOMPUTE_USER_DATA_8; -typedef union COMPUTE_USER_DATA_9 regCOMPUTE_USER_DATA_9; -typedef union COMPUTE_USER_DATA_10 regCOMPUTE_USER_DATA_10; -typedef union COMPUTE_USER_DATA_11 regCOMPUTE_USER_DATA_11; -typedef union COMPUTE_USER_DATA_12 regCOMPUTE_USER_DATA_12; -typedef union COMPUTE_USER_DATA_13 regCOMPUTE_USER_DATA_13; -typedef union COMPUTE_USER_DATA_14 regCOMPUTE_USER_DATA_14; -typedef union COMPUTE_USER_DATA_15 regCOMPUTE_USER_DATA_15; -typedef union COMPUTE_VMID regCOMPUTE_VMID; -typedef union COMPUTE_WAVE_RESTORE_ADDR_HI regCOMPUTE_WAVE_RESTORE_ADDR_HI; -typedef union COMPUTE_WAVE_RESTORE_ADDR_LO regCOMPUTE_WAVE_RESTORE_ADDR_LO; -typedef union CPC_PERFCOUNTER0_HI regCPC_PERFCOUNTER0_HI; -typedef union CPC_PERFCOUNTER0_LO regCPC_PERFCOUNTER0_LO; -typedef union CPC_PERFCOUNTER0_SELECT regCPC_PERFCOUNTER0_SELECT; -typedef union CPC_PERFCOUNTER0_SELECT1 regCPC_PERFCOUNTER0_SELECT1; -typedef union CPC_PERFCOUNTER1_HI regCPC_PERFCOUNTER1_HI; -typedef union CPC_PERFCOUNTER1_LO regCPC_PERFCOUNTER1_LO; -typedef union CPC_PERFCOUNTER1_SELECT regCPC_PERFCOUNTER1_SELECT; -typedef union CPF_PERFCOUNTER0_HI regCPF_PERFCOUNTER0_HI; -typedef union CPF_PERFCOUNTER0_LO regCPF_PERFCOUNTER0_LO; -typedef union CPF_PERFCOUNTER0_SELECT regCPF_PERFCOUNTER0_SELECT; -typedef union CPF_PERFCOUNTER0_SELECT1 regCPF_PERFCOUNTER0_SELECT1; -typedef union CPF_PERFCOUNTER1_HI regCPF_PERFCOUNTER1_HI; -typedef union CPF_PERFCOUNTER1_LO regCPF_PERFCOUNTER1_LO; -typedef union CPF_PERFCOUNTER1_SELECT regCPF_PERFCOUNTER1_SELECT; -typedef union CPG_PERFCOUNTER0_HI regCPG_PERFCOUNTER0_HI; -typedef union CPG_PERFCOUNTER0_LO regCPG_PERFCOUNTER0_LO; -typedef union CPG_PERFCOUNTER0_SELECT regCPG_PERFCOUNTER0_SELECT; -typedef union CPG_PERFCOUNTER0_SELECT1 regCPG_PERFCOUNTER0_SELECT1; -typedef union CPG_PERFCOUNTER1_HI regCPG_PERFCOUNTER1_HI; -typedef union CPG_PERFCOUNTER1_LO regCPG_PERFCOUNTER1_LO; -typedef union CPG_PERFCOUNTER1_SELECT regCPG_PERFCOUNTER1_SELECT; -typedef union CP_COHER_BASE regCP_COHER_BASE; -typedef union CP_COHER_BASE_HI regCP_COHER_BASE_HI; -typedef union CP_COHER_CNTL regCP_COHER_CNTL; -typedef union CP_COHER_SIZE regCP_COHER_SIZE; -typedef union CP_COHER_SIZE_HI regCP_COHER_SIZE_HI; -typedef union CP_COHER_START_DELAY regCP_COHER_START_DELAY; -typedef union CP_COHER_STATUS regCP_COHER_STATUS; -typedef union CP_ME_COHER_BASE regCP_ME_COHER_BASE; -typedef union CP_ME_COHER_BASE_HI regCP_ME_COHER_BASE_HI; -typedef union CP_ME_COHER_CNTL regCP_ME_COHER_CNTL; -typedef union CP_ME_COHER_SIZE regCP_ME_COHER_SIZE; -typedef union CP_ME_COHER_SIZE_HI regCP_ME_COHER_SIZE_HI; -typedef union CP_ME_COHER_STATUS regCP_ME_COHER_STATUS; -typedef union CP_NUM_PRIM_NEEDED_COUNT0_HI regCP_NUM_PRIM_NEEDED_COUNT0_HI; -typedef union CP_NUM_PRIM_NEEDED_COUNT0_LO regCP_NUM_PRIM_NEEDED_COUNT0_LO; -typedef union CP_NUM_PRIM_NEEDED_COUNT1_HI regCP_NUM_PRIM_NEEDED_COUNT1_HI; -typedef union CP_NUM_PRIM_NEEDED_COUNT1_LO regCP_NUM_PRIM_NEEDED_COUNT1_LO; -typedef union CP_NUM_PRIM_NEEDED_COUNT2_HI regCP_NUM_PRIM_NEEDED_COUNT2_HI; -typedef union CP_NUM_PRIM_NEEDED_COUNT2_LO regCP_NUM_PRIM_NEEDED_COUNT2_LO; -typedef union CP_NUM_PRIM_NEEDED_COUNT3_HI regCP_NUM_PRIM_NEEDED_COUNT3_HI; -typedef union CP_NUM_PRIM_NEEDED_COUNT3_LO regCP_NUM_PRIM_NEEDED_COUNT3_LO; -typedef union CP_NUM_PRIM_WRITTEN_COUNT0_HI regCP_NUM_PRIM_WRITTEN_COUNT0_HI; -typedef union CP_NUM_PRIM_WRITTEN_COUNT0_LO regCP_NUM_PRIM_WRITTEN_COUNT0_LO; -typedef union CP_NUM_PRIM_WRITTEN_COUNT1_HI regCP_NUM_PRIM_WRITTEN_COUNT1_HI; -typedef union CP_NUM_PRIM_WRITTEN_COUNT1_LO regCP_NUM_PRIM_WRITTEN_COUNT1_LO; -typedef union CP_NUM_PRIM_WRITTEN_COUNT2_HI regCP_NUM_PRIM_WRITTEN_COUNT2_HI; -typedef union CP_NUM_PRIM_WRITTEN_COUNT2_LO regCP_NUM_PRIM_WRITTEN_COUNT2_LO; -typedef union CP_NUM_PRIM_WRITTEN_COUNT3_HI regCP_NUM_PRIM_WRITTEN_COUNT3_HI; -typedef union CP_NUM_PRIM_WRITTEN_COUNT3_LO regCP_NUM_PRIM_WRITTEN_COUNT3_LO; -typedef union CP_PERFMON_CNTL regCP_PERFMON_CNTL; -typedef union CP_PERFMON_CNTX_CNTL regCP_PERFMON_CNTX_CNTL; -typedef union CP_SC_PSINVOC_COUNT0_HI regCP_SC_PSINVOC_COUNT0_HI; -typedef union CP_SC_PSINVOC_COUNT0_LO regCP_SC_PSINVOC_COUNT0_LO; -typedef union CP_SC_PSINVOC_COUNT1_HI regCP_SC_PSINVOC_COUNT1_HI; -typedef union CP_SC_PSINVOC_COUNT1_LO regCP_SC_PSINVOC_COUNT1_LO; -typedef union CP_STRMOUT_CNTL regCP_STRMOUT_CNTL; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union CP_VGT_ASINVOC_COUNT_HI regCP_VGT_ASINVOC_COUNT_HI; -typedef union CP_VGT_ASINVOC_COUNT_LO regCP_VGT_ASINVOC_COUNT_LO; -#endif -typedef union CP_VGT_CSINVOC_COUNT_HI regCP_VGT_CSINVOC_COUNT_HI; -typedef union CP_VGT_CSINVOC_COUNT_LO regCP_VGT_CSINVOC_COUNT_LO; -typedef union CP_VGT_DSINVOC_COUNT_HI regCP_VGT_DSINVOC_COUNT_HI; -typedef union CP_VGT_DSINVOC_COUNT_LO regCP_VGT_DSINVOC_COUNT_LO; -typedef union CP_VGT_GSINVOC_COUNT_HI regCP_VGT_GSINVOC_COUNT_HI; -typedef union CP_VGT_GSINVOC_COUNT_LO regCP_VGT_GSINVOC_COUNT_LO; -typedef union CP_VGT_GSPRIM_COUNT_HI regCP_VGT_GSPRIM_COUNT_HI; -typedef union CP_VGT_GSPRIM_COUNT_LO regCP_VGT_GSPRIM_COUNT_LO; -typedef union CP_VGT_HSINVOC_COUNT_HI regCP_VGT_HSINVOC_COUNT_HI; -typedef union CP_VGT_HSINVOC_COUNT_LO regCP_VGT_HSINVOC_COUNT_LO; -typedef union CP_VGT_IAPRIM_COUNT_HI regCP_VGT_IAPRIM_COUNT_HI; -typedef union CP_VGT_IAPRIM_COUNT_LO regCP_VGT_IAPRIM_COUNT_LO; -typedef union CP_VGT_IAVERT_COUNT_HI regCP_VGT_IAVERT_COUNT_HI; -typedef union CP_VGT_IAVERT_COUNT_LO regCP_VGT_IAVERT_COUNT_LO; -typedef union CP_VGT_VSINVOC_COUNT_HI regCP_VGT_VSINVOC_COUNT_HI; -typedef union CP_VGT_VSINVOC_COUNT_LO regCP_VGT_VSINVOC_COUNT_LO; -typedef union DB_ALPHA_TO_MASK regDB_ALPHA_TO_MASK; -typedef union DB_CGTT_CLK_CTRL_0 regDB_CGTT_CLK_CTRL_0; -typedef union DB_COUNT_CONTROL regDB_COUNT_CONTROL; -typedef union DB_CREDIT_LIMIT regDB_CREDIT_LIMIT; -typedef union DB_DEPTH_BOUNDS_MAX regDB_DEPTH_BOUNDS_MAX; -typedef union DB_DEPTH_BOUNDS_MIN regDB_DEPTH_BOUNDS_MIN; -typedef union DB_DEPTH_CLEAR regDB_DEPTH_CLEAR; -typedef union DB_DEPTH_CONTROL regDB_DEPTH_CONTROL; -typedef union DB_DEPTH_SIZE regDB_DEPTH_SIZE; -typedef union DB_DEPTH_SIZE_XY regDB_DEPTH_SIZE_XY; -typedef union DB_DEPTH_VIEW regDB_DEPTH_VIEW; -typedef union DB_DFSM_CONFIG regDB_DFSM_CONFIG; -typedef union DB_DFSM_CONTROL regDB_DFSM_CONTROL; -typedef union DB_DFSM_FLUSH_AUX_EVENT regDB_DFSM_FLUSH_AUX_EVENT; -typedef union DB_DFSM_FLUSH_ENABLE regDB_DFSM_FLUSH_ENABLE; -typedef union DB_DFSM_PRIMS_IN_FLIGHT regDB_DFSM_PRIMS_IN_FLIGHT; -typedef union DB_DFSM_TILES_IN_FLIGHT regDB_DFSM_TILES_IN_FLIGHT; -typedef union DB_DFSM_WATCHDOG regDB_DFSM_WATCHDOG; -typedef union DB_DFSM_WATERMARK regDB_DFSM_WATERMARK; -typedef union DB_EQAA regDB_EQAA; -typedef union DB_EQUAD_STUTTER_CONTROL regDB_EQUAD_STUTTER_CONTROL; -typedef union DB_ETILE_STUTTER_CONTROL regDB_ETILE_STUTTER_CONTROL; -typedef union DB_EXCEPTION_CONTROL regDB_EXCEPTION_CONTROL; -typedef union DB_FGCG_INTERFACES_CLK_CTRL regDB_FGCG_INTERFACES_CLK_CTRL; -typedef union DB_FGCG_SRAMS_CLK_CTRL regDB_FGCG_SRAMS_CLK_CTRL; -typedef union DB_FIFO_DEPTH1 regDB_FIFO_DEPTH1; -typedef union DB_FIFO_DEPTH2 regDB_FIFO_DEPTH2; -typedef union DB_FIFO_DEPTH3 regDB_FIFO_DEPTH3; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union DB_FIFO_DEPTH4 regDB_FIFO_DEPTH4; -#endif -typedef union DB_FREE_CACHELINES regDB_FREE_CACHELINES; -typedef union DB_HTILE_DATA_BASE regDB_HTILE_DATA_BASE; -typedef union DB_HTILE_DATA_BASE_HI regDB_HTILE_DATA_BASE_HI; -typedef union DB_HTILE_SURFACE regDB_HTILE_SURFACE; -typedef union DB_LAST_OF_BURST_CONFIG regDB_LAST_OF_BURST_CONFIG; -typedef union DB_LQUAD_STUTTER_CONTROL regDB_LQUAD_STUTTER_CONTROL; -typedef union DB_LTILE_STUTTER_CONTROL regDB_LTILE_STUTTER_CONTROL; -typedef union DB_MEM_ARB_WATERMARKS regDB_MEM_ARB_WATERMARKS; -typedef union DB_OCCLUSION_COUNT0_HI regDB_OCCLUSION_COUNT0_HI; -typedef union DB_OCCLUSION_COUNT0_LOW regDB_OCCLUSION_COUNT0_LOW; -typedef union DB_OCCLUSION_COUNT1_HI regDB_OCCLUSION_COUNT1_HI; -typedef union DB_OCCLUSION_COUNT1_LOW regDB_OCCLUSION_COUNT1_LOW; -typedef union DB_OCCLUSION_COUNT2_HI regDB_OCCLUSION_COUNT2_HI; -typedef union DB_OCCLUSION_COUNT2_LOW regDB_OCCLUSION_COUNT2_LOW; -typedef union DB_OCCLUSION_COUNT3_HI regDB_OCCLUSION_COUNT3_HI; -typedef union DB_OCCLUSION_COUNT3_LOW regDB_OCCLUSION_COUNT3_LOW; -typedef union DB_PERFCOUNTER0_HI regDB_PERFCOUNTER0_HI; -typedef union DB_PERFCOUNTER0_LO regDB_PERFCOUNTER0_LO; -typedef union DB_PERFCOUNTER0_SELECT regDB_PERFCOUNTER0_SELECT; -typedef union DB_PERFCOUNTER0_SELECT1 regDB_PERFCOUNTER0_SELECT1; -typedef union DB_PERFCOUNTER1_HI regDB_PERFCOUNTER1_HI; -typedef union DB_PERFCOUNTER1_LO regDB_PERFCOUNTER1_LO; -typedef union DB_PERFCOUNTER1_SELECT regDB_PERFCOUNTER1_SELECT; -typedef union DB_PERFCOUNTER1_SELECT1 regDB_PERFCOUNTER1_SELECT1; -typedef union DB_PERFCOUNTER2_HI regDB_PERFCOUNTER2_HI; -typedef union DB_PERFCOUNTER2_LO regDB_PERFCOUNTER2_LO; -typedef union DB_PERFCOUNTER2_SELECT regDB_PERFCOUNTER2_SELECT; -typedef union DB_PERFCOUNTER3_HI regDB_PERFCOUNTER3_HI; -typedef union DB_PERFCOUNTER3_LO regDB_PERFCOUNTER3_LO; -typedef union DB_PERFCOUNTER3_SELECT regDB_PERFCOUNTER3_SELECT; -typedef union DB_PRELOAD_CONTROL regDB_PRELOAD_CONTROL; -typedef union DB_RENDER_CONTROL regDB_RENDER_CONTROL; -typedef union DB_RENDER_OVERRIDE regDB_RENDER_OVERRIDE; -typedef union DB_RENDER_OVERRIDE2 regDB_RENDER_OVERRIDE2; -typedef union DB_RESERVED_REG_1 regDB_RESERVED_REG_1; -typedef union DB_RESERVED_REG_2 regDB_RESERVED_REG_2; -typedef union DB_RESERVED_REG_3 regDB_RESERVED_REG_3; -typedef union DB_RING_CONTROL regDB_RING_CONTROL; -typedef union DB_RMI_BC_GL2_CACHE_CONTROL regDB_RMI_BC_GL2_CACHE_CONTROL; -typedef union DB_RMI_CACHE_POLICY regDB_RMI_CACHE_POLICY; -typedef union DB_RMI_L2_CACHE_CONTROL regDB_RMI_L2_CACHE_CONTROL; -typedef union DB_SHADER_CONTROL regDB_SHADER_CONTROL; -typedef union DB_SPI_VRS_CENTER_LOCATION regDB_SPI_VRS_CENTER_LOCATION; -typedef union DB_SRESULTS_COMPARE_STATE0 regDB_SRESULTS_COMPARE_STATE0; -typedef union DB_SRESULTS_COMPARE_STATE1 regDB_SRESULTS_COMPARE_STATE1; -typedef union DB_STENCILREFMASK regDB_STENCILREFMASK; -typedef union DB_STENCILREFMASK_BF regDB_STENCILREFMASK_BF; -typedef union DB_STENCIL_CLEAR regDB_STENCIL_CLEAR; -typedef union DB_STENCIL_CONTROL regDB_STENCIL_CONTROL; -typedef union DB_STENCIL_INFO regDB_STENCIL_INFO; -typedef union DB_STENCIL_INFO2 regDB_STENCIL_INFO2; -typedef union DB_STENCIL_READ_BASE regDB_STENCIL_READ_BASE; -typedef union DB_STENCIL_READ_BASE_HI regDB_STENCIL_READ_BASE_HI; -typedef union DB_STENCIL_WRITE_BASE regDB_STENCIL_WRITE_BASE; -typedef union DB_STENCIL_WRITE_BASE_HI regDB_STENCIL_WRITE_BASE_HI; -typedef union DB_SUBTILE_CONTROL regDB_SUBTILE_CONTROL; -typedef union DB_VRS_OVERRIDE_CNTL regDB_VRS_OVERRIDE_CNTL; -typedef union DB_WATERMARKS regDB_WATERMARKS; -typedef union DB_ZPASS_COUNT_HI regDB_ZPASS_COUNT_HI; -typedef union DB_ZPASS_COUNT_LOW regDB_ZPASS_COUNT_LOW; -typedef union DB_Z_INFO regDB_Z_INFO; -typedef union DB_Z_INFO2 regDB_Z_INFO2; -typedef union DB_Z_READ_BASE regDB_Z_READ_BASE; -typedef union DB_Z_READ_BASE_HI regDB_Z_READ_BASE_HI; -typedef union DB_Z_WRITE_BASE regDB_Z_WRITE_BASE; -typedef union DB_Z_WRITE_BASE_HI regDB_Z_WRITE_BASE_HI; -typedef union DF_PIE_AON_PerfMonCtlHi0 regDF_PIE_AON_PerfMonCtlHi0; -typedef union DF_PIE_AON_PerfMonCtlHi1 regDF_PIE_AON_PerfMonCtlHi1; -typedef union DF_PIE_AON_PerfMonCtlHi2 regDF_PIE_AON_PerfMonCtlHi2; -typedef union DF_PIE_AON_PerfMonCtlHi3 regDF_PIE_AON_PerfMonCtlHi3; -typedef union DF_PIE_AON_PerfMonCtlHi4 regDF_PIE_AON_PerfMonCtlHi4; -typedef union DF_PIE_AON_PerfMonCtlHi5 regDF_PIE_AON_PerfMonCtlHi5; -typedef union DF_PIE_AON_PerfMonCtlHi6 regDF_PIE_AON_PerfMonCtlHi6; -typedef union DF_PIE_AON_PerfMonCtlHi7 regDF_PIE_AON_PerfMonCtlHi7; -typedef union DF_PIE_AON_PerfMonCtlHi8 regDF_PIE_AON_PerfMonCtlHi8; -typedef union DF_PIE_AON_PerfMonCtlHi9 regDF_PIE_AON_PerfMonCtlHi9; -typedef union DF_PIE_AON_PerfMonCtlHi10 regDF_PIE_AON_PerfMonCtlHi10; -typedef union DF_PIE_AON_PerfMonCtlHi11 regDF_PIE_AON_PerfMonCtlHi11; -typedef union DF_PIE_AON_PerfMonCtlHi12 regDF_PIE_AON_PerfMonCtlHi12; -typedef union DF_PIE_AON_PerfMonCtlHi13 regDF_PIE_AON_PerfMonCtlHi13; -typedef union DF_PIE_AON_PerfMonCtlHi14 regDF_PIE_AON_PerfMonCtlHi14; -typedef union DF_PIE_AON_PerfMonCtlHi15 regDF_PIE_AON_PerfMonCtlHi15; -typedef union DF_PIE_AON_PerfMonCtlLo0 regDF_PIE_AON_PerfMonCtlLo0; -typedef union DF_PIE_AON_PerfMonCtlLo1 regDF_PIE_AON_PerfMonCtlLo1; -typedef union DF_PIE_AON_PerfMonCtlLo2 regDF_PIE_AON_PerfMonCtlLo2; -typedef union DF_PIE_AON_PerfMonCtlLo3 regDF_PIE_AON_PerfMonCtlLo3; -typedef union DF_PIE_AON_PerfMonCtlLo4 regDF_PIE_AON_PerfMonCtlLo4; -typedef union DF_PIE_AON_PerfMonCtlLo5 regDF_PIE_AON_PerfMonCtlLo5; -typedef union DF_PIE_AON_PerfMonCtlLo6 regDF_PIE_AON_PerfMonCtlLo6; -typedef union DF_PIE_AON_PerfMonCtlLo7 regDF_PIE_AON_PerfMonCtlLo7; -typedef union DF_PIE_AON_PerfMonCtlLo8 regDF_PIE_AON_PerfMonCtlLo8; -typedef union DF_PIE_AON_PerfMonCtlLo9 regDF_PIE_AON_PerfMonCtlLo9; -typedef union DF_PIE_AON_PerfMonCtlLo10 regDF_PIE_AON_PerfMonCtlLo10; -typedef union DF_PIE_AON_PerfMonCtlLo11 regDF_PIE_AON_PerfMonCtlLo11; -typedef union DF_PIE_AON_PerfMonCtlLo12 regDF_PIE_AON_PerfMonCtlLo12; -typedef union DF_PIE_AON_PerfMonCtlLo13 regDF_PIE_AON_PerfMonCtlLo13; -typedef union DF_PIE_AON_PerfMonCtlLo14 regDF_PIE_AON_PerfMonCtlLo14; -typedef union DF_PIE_AON_PerfMonCtlLo15 regDF_PIE_AON_PerfMonCtlLo15; -typedef union DF_PIE_AON_PerfMonCtrHi0 regDF_PIE_AON_PerfMonCtrHi0; -typedef union DF_PIE_AON_PerfMonCtrHi1 regDF_PIE_AON_PerfMonCtrHi1; -typedef union DF_PIE_AON_PerfMonCtrHi2 regDF_PIE_AON_PerfMonCtrHi2; -typedef union DF_PIE_AON_PerfMonCtrHi3 regDF_PIE_AON_PerfMonCtrHi3; -typedef union DF_PIE_AON_PerfMonCtrHi4 regDF_PIE_AON_PerfMonCtrHi4; -typedef union DF_PIE_AON_PerfMonCtrHi5 regDF_PIE_AON_PerfMonCtrHi5; -typedef union DF_PIE_AON_PerfMonCtrHi6 regDF_PIE_AON_PerfMonCtrHi6; -typedef union DF_PIE_AON_PerfMonCtrHi7 regDF_PIE_AON_PerfMonCtrHi7; -typedef union DF_PIE_AON_PerfMonCtrHi8 regDF_PIE_AON_PerfMonCtrHi8; -typedef union DF_PIE_AON_PerfMonCtrHi9 regDF_PIE_AON_PerfMonCtrHi9; -typedef union DF_PIE_AON_PerfMonCtrHi10 regDF_PIE_AON_PerfMonCtrHi10; -typedef union DF_PIE_AON_PerfMonCtrHi11 regDF_PIE_AON_PerfMonCtrHi11; -typedef union DF_PIE_AON_PerfMonCtrHi12 regDF_PIE_AON_PerfMonCtrHi12; -typedef union DF_PIE_AON_PerfMonCtrHi13 regDF_PIE_AON_PerfMonCtrHi13; -typedef union DF_PIE_AON_PerfMonCtrHi14 regDF_PIE_AON_PerfMonCtrHi14; -typedef union DF_PIE_AON_PerfMonCtrHi15 regDF_PIE_AON_PerfMonCtrHi15; -typedef union DF_PIE_AON_PerfMonCtrLo0 regDF_PIE_AON_PerfMonCtrLo0; -typedef union DF_PIE_AON_PerfMonCtrLo1 regDF_PIE_AON_PerfMonCtrLo1; -typedef union DF_PIE_AON_PerfMonCtrLo2 regDF_PIE_AON_PerfMonCtrLo2; -typedef union DF_PIE_AON_PerfMonCtrLo3 regDF_PIE_AON_PerfMonCtrLo3; -typedef union DF_PIE_AON_PerfMonCtrLo4 regDF_PIE_AON_PerfMonCtrLo4; -typedef union DF_PIE_AON_PerfMonCtrLo5 regDF_PIE_AON_PerfMonCtrLo5; -typedef union DF_PIE_AON_PerfMonCtrLo6 regDF_PIE_AON_PerfMonCtrLo6; -typedef union DF_PIE_AON_PerfMonCtrLo7 regDF_PIE_AON_PerfMonCtrLo7; -typedef union DF_PIE_AON_PerfMonCtrLo8 regDF_PIE_AON_PerfMonCtrLo8; -typedef union DF_PIE_AON_PerfMonCtrLo9 regDF_PIE_AON_PerfMonCtrLo9; -typedef union DF_PIE_AON_PerfMonCtrLo10 regDF_PIE_AON_PerfMonCtrLo10; -typedef union DF_PIE_AON_PerfMonCtrLo11 regDF_PIE_AON_PerfMonCtrLo11; -typedef union DF_PIE_AON_PerfMonCtrLo12 regDF_PIE_AON_PerfMonCtrLo12; -typedef union DF_PIE_AON_PerfMonCtrLo13 regDF_PIE_AON_PerfMonCtrLo13; -typedef union DF_PIE_AON_PerfMonCtrLo14 regDF_PIE_AON_PerfMonCtrLo14; -typedef union DF_PIE_AON_PerfMonCtrLo15 regDF_PIE_AON_PerfMonCtrLo15; -typedef union GB_ADDR_CONFIG regGB_ADDR_CONFIG; -typedef union GB_ADDR_CONFIG_READ regGB_ADDR_CONFIG_READ; -typedef union GCEA_PERFCOUNTER0_CFG regGCEA_PERFCOUNTER0_CFG; -typedef union GCEA_PERFCOUNTER1_CFG regGCEA_PERFCOUNTER1_CFG; -typedef union GCEA_PERFCOUNTER2_HI regGCEA_PERFCOUNTER2_HI; -typedef union GCEA_PERFCOUNTER2_LO regGCEA_PERFCOUNTER2_LO; -typedef union GCEA_PERFCOUNTER2_MODE regGCEA_PERFCOUNTER2_MODE; -typedef union GCEA_PERFCOUNTER2_SELECT regGCEA_PERFCOUNTER2_SELECT; -typedef union GCEA_PERFCOUNTER2_SELECT1 regGCEA_PERFCOUNTER2_SELECT1; -typedef union GCEA_PERFCOUNTER_HI regGCEA_PERFCOUNTER_HI; -typedef union GCEA_PERFCOUNTER_LO regGCEA_PERFCOUNTER_LO; -typedef union GCEA_PERFCOUNTER_RSLT_CNTL regGCEA_PERFCOUNTER_RSLT_CNTL; -typedef union GCMC_VM_L2_PERFCOUNTER0_CFG regGCMC_VM_L2_PERFCOUNTER0_CFG; -typedef union GCMC_VM_L2_PERFCOUNTER1_CFG regGCMC_VM_L2_PERFCOUNTER1_CFG; -typedef union GCMC_VM_L2_PERFCOUNTER2_CFG regGCMC_VM_L2_PERFCOUNTER2_CFG; -typedef union GCMC_VM_L2_PERFCOUNTER3_CFG regGCMC_VM_L2_PERFCOUNTER3_CFG; -typedef union GCMC_VM_L2_PERFCOUNTER4_CFG regGCMC_VM_L2_PERFCOUNTER4_CFG; -typedef union GCMC_VM_L2_PERFCOUNTER5_CFG regGCMC_VM_L2_PERFCOUNTER5_CFG; -typedef union GCMC_VM_L2_PERFCOUNTER6_CFG regGCMC_VM_L2_PERFCOUNTER6_CFG; -typedef union GCMC_VM_L2_PERFCOUNTER7_CFG regGCMC_VM_L2_PERFCOUNTER7_CFG; -typedef union GCMC_VM_L2_PERFCOUNTER_HI regGCMC_VM_L2_PERFCOUNTER_HI; -typedef union GCMC_VM_L2_PERFCOUNTER_LO regGCMC_VM_L2_PERFCOUNTER_LO; -typedef union GCMC_VM_L2_PERFCOUNTER_RSLT_CNTL regGCMC_VM_L2_PERFCOUNTER_RSLT_CNTL; -typedef union GCR_PERFCOUNTER0_HI regGCR_PERFCOUNTER0_HI; -typedef union GCR_PERFCOUNTER0_LO regGCR_PERFCOUNTER0_LO; -typedef union GCR_PERFCOUNTER0_SELECT regGCR_PERFCOUNTER0_SELECT; -typedef union GCR_PERFCOUNTER0_SELECT1 regGCR_PERFCOUNTER0_SELECT1; -typedef union GCR_PERFCOUNTER1_HI regGCR_PERFCOUNTER1_HI; -typedef union GCR_PERFCOUNTER1_LO regGCR_PERFCOUNTER1_LO; -typedef union GCR_PERFCOUNTER1_SELECT regGCR_PERFCOUNTER1_SELECT; -typedef union GCVML2_PERFCOUNTER2_0_HI regGCVML2_PERFCOUNTER2_0_HI; -typedef union GCVML2_PERFCOUNTER2_0_LO regGCVML2_PERFCOUNTER2_0_LO; -typedef union GCVML2_PERFCOUNTER2_0_MODE regGCVML2_PERFCOUNTER2_0_MODE; -typedef union GCVML2_PERFCOUNTER2_0_SELECT regGCVML2_PERFCOUNTER2_0_SELECT; -typedef union GCVML2_PERFCOUNTER2_0_SELECT1 regGCVML2_PERFCOUNTER2_0_SELECT1; -typedef union GCVML2_PERFCOUNTER2_1_HI regGCVML2_PERFCOUNTER2_1_HI; -typedef union GCVML2_PERFCOUNTER2_1_LO regGCVML2_PERFCOUNTER2_1_LO; -typedef union GCVML2_PERFCOUNTER2_1_MODE regGCVML2_PERFCOUNTER2_1_MODE; -typedef union GCVML2_PERFCOUNTER2_1_SELECT regGCVML2_PERFCOUNTER2_1_SELECT; -typedef union GCVML2_PERFCOUNTER2_1_SELECT1 regGCVML2_PERFCOUNTER2_1_SELECT1; -typedef union GC_ATC_L2_PERFCOUNTER0_CFG regGC_ATC_L2_PERFCOUNTER0_CFG; -typedef union GC_ATC_L2_PERFCOUNTER1_CFG regGC_ATC_L2_PERFCOUNTER1_CFG; -typedef union GC_ATC_L2_PERFCOUNTER2_HI regGC_ATC_L2_PERFCOUNTER2_HI; -typedef union GC_ATC_L2_PERFCOUNTER2_LO regGC_ATC_L2_PERFCOUNTER2_LO; -typedef union GC_ATC_L2_PERFCOUNTER2_MODE regGC_ATC_L2_PERFCOUNTER2_MODE; -typedef union GC_ATC_L2_PERFCOUNTER2_SELECT regGC_ATC_L2_PERFCOUNTER2_SELECT; -typedef union GC_ATC_L2_PERFCOUNTER2_SELECT1 regGC_ATC_L2_PERFCOUNTER2_SELECT1; -typedef union GC_ATC_L2_PERFCOUNTER_HI regGC_ATC_L2_PERFCOUNTER_HI; -typedef union GC_ATC_L2_PERFCOUNTER_LO regGC_ATC_L2_PERFCOUNTER_LO; -typedef union GC_ATC_L2_PERFCOUNTER_RSLT_CNTL regGC_ATC_L2_PERFCOUNTER_RSLT_CNTL; -typedef union GDS_PERFCOUNTER0_HI regGDS_PERFCOUNTER0_HI; -typedef union GDS_PERFCOUNTER0_LO regGDS_PERFCOUNTER0_LO; -typedef union GDS_PERFCOUNTER0_SELECT regGDS_PERFCOUNTER0_SELECT; -typedef union GDS_PERFCOUNTER0_SELECT1 regGDS_PERFCOUNTER0_SELECT1; -typedef union GDS_PERFCOUNTER1_HI regGDS_PERFCOUNTER1_HI; -typedef union GDS_PERFCOUNTER1_LO regGDS_PERFCOUNTER1_LO; -typedef union GDS_PERFCOUNTER1_SELECT regGDS_PERFCOUNTER1_SELECT; -typedef union GDS_PERFCOUNTER1_SELECT1 regGDS_PERFCOUNTER1_SELECT1; -typedef union GDS_PERFCOUNTER2_HI regGDS_PERFCOUNTER2_HI; -typedef union GDS_PERFCOUNTER2_LO regGDS_PERFCOUNTER2_LO; -typedef union GDS_PERFCOUNTER2_SELECT regGDS_PERFCOUNTER2_SELECT; -typedef union GDS_PERFCOUNTER2_SELECT1 regGDS_PERFCOUNTER2_SELECT1; -typedef union GDS_PERFCOUNTER3_HI regGDS_PERFCOUNTER3_HI; -typedef union GDS_PERFCOUNTER3_LO regGDS_PERFCOUNTER3_LO; -typedef union GDS_PERFCOUNTER3_SELECT regGDS_PERFCOUNTER3_SELECT; -typedef union GDS_PERFCOUNTER3_SELECT1 regGDS_PERFCOUNTER3_SELECT1; -typedef union GE1_PERFCOUNTER0_HI regGE1_PERFCOUNTER0_HI; -typedef union GE1_PERFCOUNTER0_LO regGE1_PERFCOUNTER0_LO; -typedef union GE1_PERFCOUNTER0_SELECT regGE1_PERFCOUNTER0_SELECT; -typedef union GE1_PERFCOUNTER0_SELECT1 regGE1_PERFCOUNTER0_SELECT1; -typedef union GE1_PERFCOUNTER1_HI regGE1_PERFCOUNTER1_HI; -typedef union GE1_PERFCOUNTER1_LO regGE1_PERFCOUNTER1_LO; -typedef union GE1_PERFCOUNTER1_SELECT regGE1_PERFCOUNTER1_SELECT; -typedef union GE1_PERFCOUNTER1_SELECT1 regGE1_PERFCOUNTER1_SELECT1; -typedef union GE1_PERFCOUNTER2_HI regGE1_PERFCOUNTER2_HI; -typedef union GE1_PERFCOUNTER2_LO regGE1_PERFCOUNTER2_LO; -typedef union GE1_PERFCOUNTER2_SELECT regGE1_PERFCOUNTER2_SELECT; -typedef union GE1_PERFCOUNTER2_SELECT1 regGE1_PERFCOUNTER2_SELECT1; -typedef union GE1_PERFCOUNTER3_HI regGE1_PERFCOUNTER3_HI; -typedef union GE1_PERFCOUNTER3_LO regGE1_PERFCOUNTER3_LO; -typedef union GE1_PERFCOUNTER3_SELECT regGE1_PERFCOUNTER3_SELECT; -typedef union GE1_PERFCOUNTER3_SELECT1 regGE1_PERFCOUNTER3_SELECT1; -typedef union GE2_DIST_PERFCOUNTER0_HI regGE2_DIST_PERFCOUNTER0_HI; -typedef union GE2_DIST_PERFCOUNTER0_LO regGE2_DIST_PERFCOUNTER0_LO; -typedef union GE2_DIST_PERFCOUNTER0_SELECT regGE2_DIST_PERFCOUNTER0_SELECT; -typedef union GE2_DIST_PERFCOUNTER0_SELECT1 regGE2_DIST_PERFCOUNTER0_SELECT1; -typedef union GE2_DIST_PERFCOUNTER1_HI regGE2_DIST_PERFCOUNTER1_HI; -typedef union GE2_DIST_PERFCOUNTER1_LO regGE2_DIST_PERFCOUNTER1_LO; -typedef union GE2_DIST_PERFCOUNTER1_SELECT regGE2_DIST_PERFCOUNTER1_SELECT; -typedef union GE2_DIST_PERFCOUNTER1_SELECT1 regGE2_DIST_PERFCOUNTER1_SELECT1; -typedef union GE2_DIST_PERFCOUNTER2_HI regGE2_DIST_PERFCOUNTER2_HI; -typedef union GE2_DIST_PERFCOUNTER2_LO regGE2_DIST_PERFCOUNTER2_LO; -typedef union GE2_DIST_PERFCOUNTER2_SELECT regGE2_DIST_PERFCOUNTER2_SELECT; -typedef union GE2_DIST_PERFCOUNTER2_SELECT1 regGE2_DIST_PERFCOUNTER2_SELECT1; -typedef union GE2_DIST_PERFCOUNTER3_HI regGE2_DIST_PERFCOUNTER3_HI; -typedef union GE2_DIST_PERFCOUNTER3_LO regGE2_DIST_PERFCOUNTER3_LO; -typedef union GE2_DIST_PERFCOUNTER3_SELECT regGE2_DIST_PERFCOUNTER3_SELECT; -typedef union GE2_DIST_PERFCOUNTER3_SELECT1 regGE2_DIST_PERFCOUNTER3_SELECT1; -typedef union GE2_SE_PERFCOUNTER0_HI regGE2_SE_PERFCOUNTER0_HI; -typedef union GE2_SE_PERFCOUNTER0_LO regGE2_SE_PERFCOUNTER0_LO; -typedef union GE2_SE_PERFCOUNTER0_SELECT regGE2_SE_PERFCOUNTER0_SELECT; -typedef union GE2_SE_PERFCOUNTER0_SELECT1 regGE2_SE_PERFCOUNTER0_SELECT1; -typedef union GE2_SE_PERFCOUNTER1_HI regGE2_SE_PERFCOUNTER1_HI; -typedef union GE2_SE_PERFCOUNTER1_LO regGE2_SE_PERFCOUNTER1_LO; -typedef union GE2_SE_PERFCOUNTER1_SELECT regGE2_SE_PERFCOUNTER1_SELECT; -typedef union GE2_SE_PERFCOUNTER1_SELECT1 regGE2_SE_PERFCOUNTER1_SELECT1; -typedef union GE2_SE_PERFCOUNTER2_HI regGE2_SE_PERFCOUNTER2_HI; -typedef union GE2_SE_PERFCOUNTER2_LO regGE2_SE_PERFCOUNTER2_LO; -typedef union GE2_SE_PERFCOUNTER2_SELECT regGE2_SE_PERFCOUNTER2_SELECT; -typedef union GE2_SE_PERFCOUNTER2_SELECT1 regGE2_SE_PERFCOUNTER2_SELECT1; -typedef union GE2_SE_PERFCOUNTER3_HI regGE2_SE_PERFCOUNTER3_HI; -typedef union GE2_SE_PERFCOUNTER3_LO regGE2_SE_PERFCOUNTER3_LO; -typedef union GE2_SE_PERFCOUNTER3_SELECT regGE2_SE_PERFCOUNTER3_SELECT; -typedef union GE2_SE_PERFCOUNTER3_SELECT1 regGE2_SE_PERFCOUNTER3_SELECT1; -typedef union GE_CNTL regGE_CNTL; -typedef union GE_DMA_FIRST_INDEX regGE_DMA_FIRST_INDEX; -typedef union GE_FAST_CLKS regGE_FAST_CLKS; -#if CHIP_HDR_NAVI32 -typedef union GE_FED_STATUS regGE_FED_STATUS; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union GE_GS_FAST_LAUNCH_WG_DIM regGE_GS_FAST_LAUNCH_WG_DIM; -typedef union GE_GS_FAST_LAUNCH_WG_DIM_1 regGE_GS_FAST_LAUNCH_WG_DIM_1; -#endif -typedef union GE_INDX_OFFSET regGE_INDX_OFFSET; -typedef union GE_MAX_OUTPUT_PER_SUBGROUP regGE_MAX_OUTPUT_PER_SUBGROUP; -typedef union GE_MAX_VTX_INDX regGE_MAX_VTX_INDX; -typedef union GE_MIN_VTX_INDX regGE_MIN_VTX_INDX; -typedef union GE_MULTI_PRIM_IB_RESET_EN regGE_MULTI_PRIM_IB_RESET_EN; -typedef union GE_NGG_SUBGRP_CNTL regGE_NGG_SUBGRP_CNTL; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union GE_PA_IF_SAFE_REG regGE_PA_IF_SAFE_REG; -#endif -typedef union GE_PC_ALLOC regGE_PC_ALLOC; -typedef union GE_PC_CNTL regGE_PC_CNTL; -typedef union GE_PERFCOUNTER0_HI regGE_PERFCOUNTER0_HI; -typedef union GE_PERFCOUNTER0_LO regGE_PERFCOUNTER0_LO; -typedef union GE_PERFCOUNTER0_SELECT regGE_PERFCOUNTER0_SELECT; -typedef union GE_PERFCOUNTER0_SELECT1 regGE_PERFCOUNTER0_SELECT1; -typedef union GE_PERFCOUNTER1_HI regGE_PERFCOUNTER1_HI; -typedef union GE_PERFCOUNTER1_LO regGE_PERFCOUNTER1_LO; -typedef union GE_PERFCOUNTER1_SELECT regGE_PERFCOUNTER1_SELECT; -typedef union GE_PERFCOUNTER1_SELECT1 regGE_PERFCOUNTER1_SELECT1; -typedef union GE_PERFCOUNTER2_HI regGE_PERFCOUNTER2_HI; -typedef union GE_PERFCOUNTER2_LO regGE_PERFCOUNTER2_LO; -typedef union GE_PERFCOUNTER2_SELECT regGE_PERFCOUNTER2_SELECT; -typedef union GE_PERFCOUNTER2_SELECT1 regGE_PERFCOUNTER2_SELECT1; -typedef union GE_PERFCOUNTER3_HI regGE_PERFCOUNTER3_HI; -typedef union GE_PERFCOUNTER3_LO regGE_PERFCOUNTER3_LO; -typedef union GE_PERFCOUNTER3_SELECT regGE_PERFCOUNTER3_SELECT; -typedef union GE_PERFCOUNTER3_SELECT1 regGE_PERFCOUNTER3_SELECT1; -typedef union GE_PERFCOUNTER4_HI regGE_PERFCOUNTER4_HI; -typedef union GE_PERFCOUNTER4_LO regGE_PERFCOUNTER4_LO; -typedef union GE_PERFCOUNTER4_SELECT regGE_PERFCOUNTER4_SELECT; -typedef union GE_PERFCOUNTER5_HI regGE_PERFCOUNTER5_HI; -typedef union GE_PERFCOUNTER5_LO regGE_PERFCOUNTER5_LO; -typedef union GE_PERFCOUNTER5_SELECT regGE_PERFCOUNTER5_SELECT; -typedef union GE_PERFCOUNTER6_HI regGE_PERFCOUNTER6_HI; -typedef union GE_PERFCOUNTER6_LO regGE_PERFCOUNTER6_LO; -typedef union GE_PERFCOUNTER6_SELECT regGE_PERFCOUNTER6_SELECT; -typedef union GE_PERFCOUNTER7_HI regGE_PERFCOUNTER7_HI; -typedef union GE_PERFCOUNTER7_LO regGE_PERFCOUNTER7_LO; -typedef union GE_PERFCOUNTER7_SELECT regGE_PERFCOUNTER7_SELECT; -typedef union GE_PERFCOUNTER8_HI regGE_PERFCOUNTER8_HI; -typedef union GE_PERFCOUNTER8_LO regGE_PERFCOUNTER8_LO; -typedef union GE_PERFCOUNTER8_SELECT regGE_PERFCOUNTER8_SELECT; -typedef union GE_PERFCOUNTER9_HI regGE_PERFCOUNTER9_HI; -typedef union GE_PERFCOUNTER9_LO regGE_PERFCOUNTER9_LO; -typedef union GE_PERFCOUNTER9_SELECT regGE_PERFCOUNTER9_SELECT; -typedef union GE_PERFCOUNTER10_HI regGE_PERFCOUNTER10_HI; -typedef union GE_PERFCOUNTER10_LO regGE_PERFCOUNTER10_LO; -typedef union GE_PERFCOUNTER10_SELECT regGE_PERFCOUNTER10_SELECT; -typedef union GE_PERFCOUNTER11_HI regGE_PERFCOUNTER11_HI; -typedef union GE_PERFCOUNTER11_LO regGE_PERFCOUNTER11_LO; -typedef union GE_PERFCOUNTER11_SELECT regGE_PERFCOUNTER11_SELECT; -typedef union GE_PRIV_CONTROL regGE_PRIV_CONTROL; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union GE_RATE_CNTL_1 regGE_RATE_CNTL_1; -typedef union GE_RATE_CNTL_2 regGE_RATE_CNTL_2; -#endif -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union GE_SPI_IF_SAFE_REG regGE_SPI_IF_SAFE_REG; -#endif -typedef union GE_STATUS regGE_STATUS; -typedef union GE_STEREO_CNTL regGE_STEREO_CNTL; -typedef union GE_USER_VGPR1 regGE_USER_VGPR1; -typedef union GE_USER_VGPR2 regGE_USER_VGPR2; -typedef union GE_USER_VGPR3 regGE_USER_VGPR3; -typedef union GE_USER_VGPR_EN regGE_USER_VGPR_EN; -typedef union GE_VRS_RATE regGE_VRS_RATE; -typedef union GL1A_PERFCOUNTER0_HI regGL1A_PERFCOUNTER0_HI; -typedef union GL1A_PERFCOUNTER0_LO regGL1A_PERFCOUNTER0_LO; -typedef union GL1A_PERFCOUNTER0_SELECT regGL1A_PERFCOUNTER0_SELECT; -typedef union GL1A_PERFCOUNTER0_SELECT1 regGL1A_PERFCOUNTER0_SELECT1; -typedef union GL1A_PERFCOUNTER1_HI regGL1A_PERFCOUNTER1_HI; -typedef union GL1A_PERFCOUNTER1_LO regGL1A_PERFCOUNTER1_LO; -typedef union GL1A_PERFCOUNTER1_SELECT regGL1A_PERFCOUNTER1_SELECT; -typedef union GL1A_PERFCOUNTER2_HI regGL1A_PERFCOUNTER2_HI; -typedef union GL1A_PERFCOUNTER2_LO regGL1A_PERFCOUNTER2_LO; -typedef union GL1A_PERFCOUNTER2_SELECT regGL1A_PERFCOUNTER2_SELECT; -typedef union GL1A_PERFCOUNTER3_HI regGL1A_PERFCOUNTER3_HI; -typedef union GL1A_PERFCOUNTER3_LO regGL1A_PERFCOUNTER3_LO; -typedef union GL1A_PERFCOUNTER3_SELECT regGL1A_PERFCOUNTER3_SELECT; -typedef union GL1C_PERFCOUNTER0_HI regGL1C_PERFCOUNTER0_HI; -typedef union GL1C_PERFCOUNTER0_LO regGL1C_PERFCOUNTER0_LO; -typedef union GL1C_PERFCOUNTER0_SELECT regGL1C_PERFCOUNTER0_SELECT; -typedef union GL1C_PERFCOUNTER0_SELECT1 regGL1C_PERFCOUNTER0_SELECT1; -typedef union GL1C_PERFCOUNTER1_HI regGL1C_PERFCOUNTER1_HI; -typedef union GL1C_PERFCOUNTER1_LO regGL1C_PERFCOUNTER1_LO; -typedef union GL1C_PERFCOUNTER1_SELECT regGL1C_PERFCOUNTER1_SELECT; -typedef union GL1C_PERFCOUNTER2_HI regGL1C_PERFCOUNTER2_HI; -typedef union GL1C_PERFCOUNTER2_LO regGL1C_PERFCOUNTER2_LO; -typedef union GL1C_PERFCOUNTER2_SELECT regGL1C_PERFCOUNTER2_SELECT; -typedef union GL1C_PERFCOUNTER3_HI regGL1C_PERFCOUNTER3_HI; -typedef union GL1C_PERFCOUNTER3_LO regGL1C_PERFCOUNTER3_LO; -typedef union GL1C_PERFCOUNTER3_SELECT regGL1C_PERFCOUNTER3_SELECT; -typedef union GL2A_PERFCOUNTER0_HI regGL2A_PERFCOUNTER0_HI; -typedef union GL2A_PERFCOUNTER0_LO regGL2A_PERFCOUNTER0_LO; -typedef union GL2A_PERFCOUNTER0_SELECT regGL2A_PERFCOUNTER0_SELECT; -typedef union GL2A_PERFCOUNTER0_SELECT1 regGL2A_PERFCOUNTER0_SELECT1; -typedef union GL2A_PERFCOUNTER1_HI regGL2A_PERFCOUNTER1_HI; -typedef union GL2A_PERFCOUNTER1_LO regGL2A_PERFCOUNTER1_LO; -typedef union GL2A_PERFCOUNTER1_SELECT regGL2A_PERFCOUNTER1_SELECT; -typedef union GL2A_PERFCOUNTER1_SELECT1 regGL2A_PERFCOUNTER1_SELECT1; -typedef union GL2A_PERFCOUNTER2_HI regGL2A_PERFCOUNTER2_HI; -typedef union GL2A_PERFCOUNTER2_LO regGL2A_PERFCOUNTER2_LO; -typedef union GL2A_PERFCOUNTER2_SELECT regGL2A_PERFCOUNTER2_SELECT; -typedef union GL2A_PERFCOUNTER3_HI regGL2A_PERFCOUNTER3_HI; -typedef union GL2A_PERFCOUNTER3_LO regGL2A_PERFCOUNTER3_LO; -typedef union GL2A_PERFCOUNTER3_SELECT regGL2A_PERFCOUNTER3_SELECT; -typedef union GL2C_PERFCOUNTER0_HI regGL2C_PERFCOUNTER0_HI; -typedef union GL2C_PERFCOUNTER0_LO regGL2C_PERFCOUNTER0_LO; -typedef union GL2C_PERFCOUNTER0_SELECT regGL2C_PERFCOUNTER0_SELECT; -typedef union GL2C_PERFCOUNTER0_SELECT1 regGL2C_PERFCOUNTER0_SELECT1; -typedef union GL2C_PERFCOUNTER1_HI regGL2C_PERFCOUNTER1_HI; -typedef union GL2C_PERFCOUNTER1_LO regGL2C_PERFCOUNTER1_LO; -typedef union GL2C_PERFCOUNTER1_SELECT regGL2C_PERFCOUNTER1_SELECT; -typedef union GL2C_PERFCOUNTER1_SELECT1 regGL2C_PERFCOUNTER1_SELECT1; -typedef union GL2C_PERFCOUNTER2_HI regGL2C_PERFCOUNTER2_HI; -typedef union GL2C_PERFCOUNTER2_LO regGL2C_PERFCOUNTER2_LO; -typedef union GL2C_PERFCOUNTER2_SELECT regGL2C_PERFCOUNTER2_SELECT; -typedef union GL2C_PERFCOUNTER3_HI regGL2C_PERFCOUNTER3_HI; -typedef union GL2C_PERFCOUNTER3_LO regGL2C_PERFCOUNTER3_LO; -typedef union GL2C_PERFCOUNTER3_SELECT regGL2C_PERFCOUNTER3_SELECT; -typedef union GRBM_CHIP_REVISION regGRBM_CHIP_REVISION; -typedef union GRBM_GFX_INDEX regGRBM_GFX_INDEX; -typedef union GRBM_GFX_INDEX_SR_DATA regGRBM_GFX_INDEX_SR_DATA; -typedef union GRBM_GFX_INDEX_SR_SELECT regGRBM_GFX_INDEX_SR_SELECT; -typedef union GRBM_PERFCOUNTER0_HI regGRBM_PERFCOUNTER0_HI; -typedef union GRBM_PERFCOUNTER0_LO regGRBM_PERFCOUNTER0_LO; -typedef union GRBM_PERFCOUNTER0_SELECT regGRBM_PERFCOUNTER0_SELECT; -typedef union GRBM_PERFCOUNTER0_SELECT_HI regGRBM_PERFCOUNTER0_SELECT_HI; -typedef union GRBM_PERFCOUNTER1_HI regGRBM_PERFCOUNTER1_HI; -typedef union GRBM_PERFCOUNTER1_LO regGRBM_PERFCOUNTER1_LO; -typedef union GRBM_PERFCOUNTER1_SELECT regGRBM_PERFCOUNTER1_SELECT; -typedef union GRBM_PERFCOUNTER1_SELECT_HI regGRBM_PERFCOUNTER1_SELECT_HI; -typedef union GRBM_SE0_PERFCOUNTER_HI regGRBM_SE0_PERFCOUNTER_HI; -typedef union GRBM_SE0_PERFCOUNTER_LO regGRBM_SE0_PERFCOUNTER_LO; -typedef union GRBM_SE0_PERFCOUNTER_SELECT regGRBM_SE0_PERFCOUNTER_SELECT; -typedef union GRBM_SE1_PERFCOUNTER_HI regGRBM_SE1_PERFCOUNTER_HI; -typedef union GRBM_SE1_PERFCOUNTER_LO regGRBM_SE1_PERFCOUNTER_LO; -typedef union GRBM_SE1_PERFCOUNTER_SELECT regGRBM_SE1_PERFCOUNTER_SELECT; -typedef union GRBM_SE2_PERFCOUNTER_HI regGRBM_SE2_PERFCOUNTER_HI; -typedef union GRBM_SE2_PERFCOUNTER_LO regGRBM_SE2_PERFCOUNTER_LO; -typedef union GRBM_SE2_PERFCOUNTER_SELECT regGRBM_SE2_PERFCOUNTER_SELECT; -typedef union GRBM_SE3_PERFCOUNTER_HI regGRBM_SE3_PERFCOUNTER_HI; -typedef union GRBM_SE3_PERFCOUNTER_LO regGRBM_SE3_PERFCOUNTER_LO; -typedef union GRBM_SE3_PERFCOUNTER_SELECT regGRBM_SE3_PERFCOUNTER_SELECT; -#if CHIP_HDR_NAVI31 -typedef union GRBM_SE4_PERFCOUNTER_HI regGRBM_SE4_PERFCOUNTER_HI; -typedef union GRBM_SE4_PERFCOUNTER_LO regGRBM_SE4_PERFCOUNTER_LO; -typedef union GRBM_SE4_PERFCOUNTER_SELECT regGRBM_SE4_PERFCOUNTER_SELECT; -typedef union GRBM_SE5_PERFCOUNTER_HI regGRBM_SE5_PERFCOUNTER_HI; -typedef union GRBM_SE5_PERFCOUNTER_LO regGRBM_SE5_PERFCOUNTER_LO; -typedef union GRBM_SE5_PERFCOUNTER_SELECT regGRBM_SE5_PERFCOUNTER_SELECT; -typedef union GRBM_SE6_PERFCOUNTER_HI regGRBM_SE6_PERFCOUNTER_HI; -typedef union GRBM_SE6_PERFCOUNTER_LO regGRBM_SE6_PERFCOUNTER_LO; -typedef union GRBM_SE6_PERFCOUNTER_SELECT regGRBM_SE6_PERFCOUNTER_SELECT; -#endif -typedef union GUS_PERFCOUNTER0_CFG regGUS_PERFCOUNTER0_CFG; -typedef union GUS_PERFCOUNTER1_CFG regGUS_PERFCOUNTER1_CFG; -typedef union GUS_PERFCOUNTER2_HI regGUS_PERFCOUNTER2_HI; -typedef union GUS_PERFCOUNTER2_LO regGUS_PERFCOUNTER2_LO; -typedef union GUS_PERFCOUNTER2_MODE regGUS_PERFCOUNTER2_MODE; -typedef union GUS_PERFCOUNTER2_SELECT regGUS_PERFCOUNTER2_SELECT; -typedef union GUS_PERFCOUNTER2_SELECT1 regGUS_PERFCOUNTER2_SELECT1; -typedef union GUS_PERFCOUNTER_HI regGUS_PERFCOUNTER_HI; -typedef union GUS_PERFCOUNTER_LO regGUS_PERFCOUNTER_LO; -typedef union GUS_PERFCOUNTER_RSLT_CNTL regGUS_PERFCOUNTER_RSLT_CNTL; -typedef union IA_CNTL_STATUS regIA_CNTL_STATUS; -typedef union IA_ENHANCE regIA_ENHANCE; -typedef union IA_MULTI_VGT_PARAM regIA_MULTI_VGT_PARAM; -typedef union IA_MULTI_VGT_PARAM_BC regIA_MULTI_VGT_PARAM_BC; -typedef union IA_MULTI_VGT_PARAM_PIPED regIA_MULTI_VGT_PARAM_PIPED; -typedef union IA_PERFCOUNTER0_HI regIA_PERFCOUNTER0_HI; -typedef union IA_PERFCOUNTER0_LO regIA_PERFCOUNTER0_LO; -typedef union IA_PERFCOUNTER0_SELECT regIA_PERFCOUNTER0_SELECT; -typedef union IA_PERFCOUNTER0_SELECT1 regIA_PERFCOUNTER0_SELECT1; -typedef union IA_PERFCOUNTER1_HI regIA_PERFCOUNTER1_HI; -typedef union IA_PERFCOUNTER1_LO regIA_PERFCOUNTER1_LO; -typedef union IA_PERFCOUNTER1_SELECT regIA_PERFCOUNTER1_SELECT; -typedef union IA_PERFCOUNTER2_HI regIA_PERFCOUNTER2_HI; -typedef union IA_PERFCOUNTER2_LO regIA_PERFCOUNTER2_LO; -typedef union IA_PERFCOUNTER2_SELECT regIA_PERFCOUNTER2_SELECT; -typedef union IA_PERFCOUNTER3_HI regIA_PERFCOUNTER3_HI; -typedef union IA_PERFCOUNTER3_LO regIA_PERFCOUNTER3_LO; -typedef union IA_PERFCOUNTER3_SELECT regIA_PERFCOUNTER3_SELECT; -typedef union IA_UTCL1_CNTL regIA_UTCL1_CNTL; -typedef union IA_UTCL1_STATUS regIA_UTCL1_STATUS; -typedef union IA_UTCL1_STATUS_2 regIA_UTCL1_STATUS_2; -typedef union MC_VM_L2_PERFCOUNTER0_CFG regMC_VM_L2_PERFCOUNTER0_CFG; -typedef union MC_VM_L2_PERFCOUNTER1_CFG regMC_VM_L2_PERFCOUNTER1_CFG; -typedef union MC_VM_L2_PERFCOUNTER2_CFG regMC_VM_L2_PERFCOUNTER2_CFG; -typedef union MC_VM_L2_PERFCOUNTER3_CFG regMC_VM_L2_PERFCOUNTER3_CFG; -typedef union MC_VM_L2_PERFCOUNTER4_CFG regMC_VM_L2_PERFCOUNTER4_CFG; -typedef union MC_VM_L2_PERFCOUNTER5_CFG regMC_VM_L2_PERFCOUNTER5_CFG; -typedef union MC_VM_L2_PERFCOUNTER6_CFG regMC_VM_L2_PERFCOUNTER6_CFG; -typedef union MC_VM_L2_PERFCOUNTER7_CFG regMC_VM_L2_PERFCOUNTER7_CFG; -typedef union MC_VM_L2_PERFCOUNTER_HI regMC_VM_L2_PERFCOUNTER_HI; -typedef union MC_VM_L2_PERFCOUNTER_LO regMC_VM_L2_PERFCOUNTER_LO; -typedef union MC_VM_L2_PERFCOUNTER_RSLT_CNTL regMC_VM_L2_PERFCOUNTER_RSLT_CNTL; -typedef union MP1_SMN_FPS_CNT regMP1_SMN_FPS_CNT; -typedef union PA_CL_CLIP_CNTL regPA_CL_CLIP_CNTL; -typedef union PA_CL_CNTL_STATUS regPA_CL_CNTL_STATUS; -typedef union PA_CL_ENHANCE regPA_CL_ENHANCE; -typedef union PA_CL_GB_HORZ_CLIP_ADJ regPA_CL_GB_HORZ_CLIP_ADJ; -typedef union PA_CL_GB_HORZ_DISC_ADJ regPA_CL_GB_HORZ_DISC_ADJ; -typedef union PA_CL_GB_VERT_CLIP_ADJ regPA_CL_GB_VERT_CLIP_ADJ; -typedef union PA_CL_GB_VERT_DISC_ADJ regPA_CL_GB_VERT_DISC_ADJ; -typedef union PA_CL_NANINF_CNTL regPA_CL_NANINF_CNTL; -typedef union PA_CL_NGG_CNTL regPA_CL_NGG_CNTL; -typedef union PA_CL_POINT_CULL_RAD regPA_CL_POINT_CULL_RAD; -typedef union PA_CL_POINT_SIZE regPA_CL_POINT_SIZE; -typedef union PA_CL_POINT_X_RAD regPA_CL_POINT_X_RAD; -typedef union PA_CL_POINT_Y_RAD regPA_CL_POINT_Y_RAD; -typedef union PA_CL_PROG_NEAR_CLIP_Z regPA_CL_PROG_NEAR_CLIP_Z; -typedef union PA_CL_UCP_0_W regPA_CL_UCP_0_W; -typedef union PA_CL_UCP_0_X regPA_CL_UCP_0_X; -typedef union PA_CL_UCP_0_Y regPA_CL_UCP_0_Y; -typedef union PA_CL_UCP_0_Z regPA_CL_UCP_0_Z; -typedef union PA_CL_UCP_1_W regPA_CL_UCP_1_W; -typedef union PA_CL_UCP_1_X regPA_CL_UCP_1_X; -typedef union PA_CL_UCP_1_Y regPA_CL_UCP_1_Y; -typedef union PA_CL_UCP_1_Z regPA_CL_UCP_1_Z; -typedef union PA_CL_UCP_2_W regPA_CL_UCP_2_W; -typedef union PA_CL_UCP_2_X regPA_CL_UCP_2_X; -typedef union PA_CL_UCP_2_Y regPA_CL_UCP_2_Y; -typedef union PA_CL_UCP_2_Z regPA_CL_UCP_2_Z; -typedef union PA_CL_UCP_3_W regPA_CL_UCP_3_W; -typedef union PA_CL_UCP_3_X regPA_CL_UCP_3_X; -typedef union PA_CL_UCP_3_Y regPA_CL_UCP_3_Y; -typedef union PA_CL_UCP_3_Z regPA_CL_UCP_3_Z; -typedef union PA_CL_UCP_4_W regPA_CL_UCP_4_W; -typedef union PA_CL_UCP_4_X regPA_CL_UCP_4_X; -typedef union PA_CL_UCP_4_Y regPA_CL_UCP_4_Y; -typedef union PA_CL_UCP_4_Z regPA_CL_UCP_4_Z; -typedef union PA_CL_UCP_5_W regPA_CL_UCP_5_W; -typedef union PA_CL_UCP_5_X regPA_CL_UCP_5_X; -typedef union PA_CL_UCP_5_Y regPA_CL_UCP_5_Y; -typedef union PA_CL_UCP_5_Z regPA_CL_UCP_5_Z; -typedef union PA_CL_VPORT_XOFFSET regPA_CL_VPORT_XOFFSET; -typedef union PA_CL_VPORT_XOFFSET_1 regPA_CL_VPORT_XOFFSET_1; -typedef union PA_CL_VPORT_XOFFSET_2 regPA_CL_VPORT_XOFFSET_2; -typedef union PA_CL_VPORT_XOFFSET_3 regPA_CL_VPORT_XOFFSET_3; -typedef union PA_CL_VPORT_XOFFSET_4 regPA_CL_VPORT_XOFFSET_4; -typedef union PA_CL_VPORT_XOFFSET_5 regPA_CL_VPORT_XOFFSET_5; -typedef union PA_CL_VPORT_XOFFSET_6 regPA_CL_VPORT_XOFFSET_6; -typedef union PA_CL_VPORT_XOFFSET_7 regPA_CL_VPORT_XOFFSET_7; -typedef union PA_CL_VPORT_XOFFSET_8 regPA_CL_VPORT_XOFFSET_8; -typedef union PA_CL_VPORT_XOFFSET_9 regPA_CL_VPORT_XOFFSET_9; -typedef union PA_CL_VPORT_XOFFSET_10 regPA_CL_VPORT_XOFFSET_10; -typedef union PA_CL_VPORT_XOFFSET_11 regPA_CL_VPORT_XOFFSET_11; -typedef union PA_CL_VPORT_XOFFSET_12 regPA_CL_VPORT_XOFFSET_12; -typedef union PA_CL_VPORT_XOFFSET_13 regPA_CL_VPORT_XOFFSET_13; -typedef union PA_CL_VPORT_XOFFSET_14 regPA_CL_VPORT_XOFFSET_14; -typedef union PA_CL_VPORT_XOFFSET_15 regPA_CL_VPORT_XOFFSET_15; -typedef union PA_CL_VPORT_XSCALE regPA_CL_VPORT_XSCALE; -typedef union PA_CL_VPORT_XSCALE_1 regPA_CL_VPORT_XSCALE_1; -typedef union PA_CL_VPORT_XSCALE_2 regPA_CL_VPORT_XSCALE_2; -typedef union PA_CL_VPORT_XSCALE_3 regPA_CL_VPORT_XSCALE_3; -typedef union PA_CL_VPORT_XSCALE_4 regPA_CL_VPORT_XSCALE_4; -typedef union PA_CL_VPORT_XSCALE_5 regPA_CL_VPORT_XSCALE_5; -typedef union PA_CL_VPORT_XSCALE_6 regPA_CL_VPORT_XSCALE_6; -typedef union PA_CL_VPORT_XSCALE_7 regPA_CL_VPORT_XSCALE_7; -typedef union PA_CL_VPORT_XSCALE_8 regPA_CL_VPORT_XSCALE_8; -typedef union PA_CL_VPORT_XSCALE_9 regPA_CL_VPORT_XSCALE_9; -typedef union PA_CL_VPORT_XSCALE_10 regPA_CL_VPORT_XSCALE_10; -typedef union PA_CL_VPORT_XSCALE_11 regPA_CL_VPORT_XSCALE_11; -typedef union PA_CL_VPORT_XSCALE_12 regPA_CL_VPORT_XSCALE_12; -typedef union PA_CL_VPORT_XSCALE_13 regPA_CL_VPORT_XSCALE_13; -typedef union PA_CL_VPORT_XSCALE_14 regPA_CL_VPORT_XSCALE_14; -typedef union PA_CL_VPORT_XSCALE_15 regPA_CL_VPORT_XSCALE_15; -typedef union PA_CL_VPORT_YOFFSET regPA_CL_VPORT_YOFFSET; -typedef union PA_CL_VPORT_YOFFSET_1 regPA_CL_VPORT_YOFFSET_1; -typedef union PA_CL_VPORT_YOFFSET_2 regPA_CL_VPORT_YOFFSET_2; -typedef union PA_CL_VPORT_YOFFSET_3 regPA_CL_VPORT_YOFFSET_3; -typedef union PA_CL_VPORT_YOFFSET_4 regPA_CL_VPORT_YOFFSET_4; -typedef union PA_CL_VPORT_YOFFSET_5 regPA_CL_VPORT_YOFFSET_5; -typedef union PA_CL_VPORT_YOFFSET_6 regPA_CL_VPORT_YOFFSET_6; -typedef union PA_CL_VPORT_YOFFSET_7 regPA_CL_VPORT_YOFFSET_7; -typedef union PA_CL_VPORT_YOFFSET_8 regPA_CL_VPORT_YOFFSET_8; -typedef union PA_CL_VPORT_YOFFSET_9 regPA_CL_VPORT_YOFFSET_9; -typedef union PA_CL_VPORT_YOFFSET_10 regPA_CL_VPORT_YOFFSET_10; -typedef union PA_CL_VPORT_YOFFSET_11 regPA_CL_VPORT_YOFFSET_11; -typedef union PA_CL_VPORT_YOFFSET_12 regPA_CL_VPORT_YOFFSET_12; -typedef union PA_CL_VPORT_YOFFSET_13 regPA_CL_VPORT_YOFFSET_13; -typedef union PA_CL_VPORT_YOFFSET_14 regPA_CL_VPORT_YOFFSET_14; -typedef union PA_CL_VPORT_YOFFSET_15 regPA_CL_VPORT_YOFFSET_15; -typedef union PA_CL_VPORT_YSCALE regPA_CL_VPORT_YSCALE; -typedef union PA_CL_VPORT_YSCALE_1 regPA_CL_VPORT_YSCALE_1; -typedef union PA_CL_VPORT_YSCALE_2 regPA_CL_VPORT_YSCALE_2; -typedef union PA_CL_VPORT_YSCALE_3 regPA_CL_VPORT_YSCALE_3; -typedef union PA_CL_VPORT_YSCALE_4 regPA_CL_VPORT_YSCALE_4; -typedef union PA_CL_VPORT_YSCALE_5 regPA_CL_VPORT_YSCALE_5; -typedef union PA_CL_VPORT_YSCALE_6 regPA_CL_VPORT_YSCALE_6; -typedef union PA_CL_VPORT_YSCALE_7 regPA_CL_VPORT_YSCALE_7; -typedef union PA_CL_VPORT_YSCALE_8 regPA_CL_VPORT_YSCALE_8; -typedef union PA_CL_VPORT_YSCALE_9 regPA_CL_VPORT_YSCALE_9; -typedef union PA_CL_VPORT_YSCALE_10 regPA_CL_VPORT_YSCALE_10; -typedef union PA_CL_VPORT_YSCALE_11 regPA_CL_VPORT_YSCALE_11; -typedef union PA_CL_VPORT_YSCALE_12 regPA_CL_VPORT_YSCALE_12; -typedef union PA_CL_VPORT_YSCALE_13 regPA_CL_VPORT_YSCALE_13; -typedef union PA_CL_VPORT_YSCALE_14 regPA_CL_VPORT_YSCALE_14; -typedef union PA_CL_VPORT_YSCALE_15 regPA_CL_VPORT_YSCALE_15; -typedef union PA_CL_VPORT_ZOFFSET regPA_CL_VPORT_ZOFFSET; -typedef union PA_CL_VPORT_ZOFFSET_1 regPA_CL_VPORT_ZOFFSET_1; -typedef union PA_CL_VPORT_ZOFFSET_2 regPA_CL_VPORT_ZOFFSET_2; -typedef union PA_CL_VPORT_ZOFFSET_3 regPA_CL_VPORT_ZOFFSET_3; -typedef union PA_CL_VPORT_ZOFFSET_4 regPA_CL_VPORT_ZOFFSET_4; -typedef union PA_CL_VPORT_ZOFFSET_5 regPA_CL_VPORT_ZOFFSET_5; -typedef union PA_CL_VPORT_ZOFFSET_6 regPA_CL_VPORT_ZOFFSET_6; -typedef union PA_CL_VPORT_ZOFFSET_7 regPA_CL_VPORT_ZOFFSET_7; -typedef union PA_CL_VPORT_ZOFFSET_8 regPA_CL_VPORT_ZOFFSET_8; -typedef union PA_CL_VPORT_ZOFFSET_9 regPA_CL_VPORT_ZOFFSET_9; -typedef union PA_CL_VPORT_ZOFFSET_10 regPA_CL_VPORT_ZOFFSET_10; -typedef union PA_CL_VPORT_ZOFFSET_11 regPA_CL_VPORT_ZOFFSET_11; -typedef union PA_CL_VPORT_ZOFFSET_12 regPA_CL_VPORT_ZOFFSET_12; -typedef union PA_CL_VPORT_ZOFFSET_13 regPA_CL_VPORT_ZOFFSET_13; -typedef union PA_CL_VPORT_ZOFFSET_14 regPA_CL_VPORT_ZOFFSET_14; -typedef union PA_CL_VPORT_ZOFFSET_15 regPA_CL_VPORT_ZOFFSET_15; -typedef union PA_CL_VPORT_ZSCALE regPA_CL_VPORT_ZSCALE; -typedef union PA_CL_VPORT_ZSCALE_1 regPA_CL_VPORT_ZSCALE_1; -typedef union PA_CL_VPORT_ZSCALE_2 regPA_CL_VPORT_ZSCALE_2; -typedef union PA_CL_VPORT_ZSCALE_3 regPA_CL_VPORT_ZSCALE_3; -typedef union PA_CL_VPORT_ZSCALE_4 regPA_CL_VPORT_ZSCALE_4; -typedef union PA_CL_VPORT_ZSCALE_5 regPA_CL_VPORT_ZSCALE_5; -typedef union PA_CL_VPORT_ZSCALE_6 regPA_CL_VPORT_ZSCALE_6; -typedef union PA_CL_VPORT_ZSCALE_7 regPA_CL_VPORT_ZSCALE_7; -typedef union PA_CL_VPORT_ZSCALE_8 regPA_CL_VPORT_ZSCALE_8; -typedef union PA_CL_VPORT_ZSCALE_9 regPA_CL_VPORT_ZSCALE_9; -typedef union PA_CL_VPORT_ZSCALE_10 regPA_CL_VPORT_ZSCALE_10; -typedef union PA_CL_VPORT_ZSCALE_11 regPA_CL_VPORT_ZSCALE_11; -typedef union PA_CL_VPORT_ZSCALE_12 regPA_CL_VPORT_ZSCALE_12; -typedef union PA_CL_VPORT_ZSCALE_13 regPA_CL_VPORT_ZSCALE_13; -typedef union PA_CL_VPORT_ZSCALE_14 regPA_CL_VPORT_ZSCALE_14; -typedef union PA_CL_VPORT_ZSCALE_15 regPA_CL_VPORT_ZSCALE_15; -typedef union PA_CL_VRS_CNTL regPA_CL_VRS_CNTL; -typedef union PA_CL_VS_OUT_CNTL regPA_CL_VS_OUT_CNTL; -typedef union PA_CL_VTE_CNTL regPA_CL_VTE_CNTL; -typedef union PA_PH_ENHANCE regPA_PH_ENHANCE; -typedef union PA_PH_INTERFACE_FIFO_SIZE regPA_PH_INTERFACE_FIFO_SIZE; -typedef union PA_PH_PERFCOUNTER0_HI regPA_PH_PERFCOUNTER0_HI; -typedef union PA_PH_PERFCOUNTER0_LO regPA_PH_PERFCOUNTER0_LO; -typedef union PA_PH_PERFCOUNTER0_SELECT regPA_PH_PERFCOUNTER0_SELECT; -typedef union PA_PH_PERFCOUNTER0_SELECT1 regPA_PH_PERFCOUNTER0_SELECT1; -typedef union PA_PH_PERFCOUNTER1_HI regPA_PH_PERFCOUNTER1_HI; -typedef union PA_PH_PERFCOUNTER1_LO regPA_PH_PERFCOUNTER1_LO; -typedef union PA_PH_PERFCOUNTER1_SELECT regPA_PH_PERFCOUNTER1_SELECT; -typedef union PA_PH_PERFCOUNTER1_SELECT1 regPA_PH_PERFCOUNTER1_SELECT1; -typedef union PA_PH_PERFCOUNTER2_HI regPA_PH_PERFCOUNTER2_HI; -typedef union PA_PH_PERFCOUNTER2_LO regPA_PH_PERFCOUNTER2_LO; -typedef union PA_PH_PERFCOUNTER2_SELECT regPA_PH_PERFCOUNTER2_SELECT; -typedef union PA_PH_PERFCOUNTER2_SELECT1 regPA_PH_PERFCOUNTER2_SELECT1; -typedef union PA_PH_PERFCOUNTER3_HI regPA_PH_PERFCOUNTER3_HI; -typedef union PA_PH_PERFCOUNTER3_LO regPA_PH_PERFCOUNTER3_LO; -typedef union PA_PH_PERFCOUNTER3_SELECT regPA_PH_PERFCOUNTER3_SELECT; -typedef union PA_PH_PERFCOUNTER3_SELECT1 regPA_PH_PERFCOUNTER3_SELECT1; -typedef union PA_PH_PERFCOUNTER4_HI regPA_PH_PERFCOUNTER4_HI; -typedef union PA_PH_PERFCOUNTER4_LO regPA_PH_PERFCOUNTER4_LO; -typedef union PA_PH_PERFCOUNTER4_SELECT regPA_PH_PERFCOUNTER4_SELECT; -typedef union PA_PH_PERFCOUNTER5_HI regPA_PH_PERFCOUNTER5_HI; -typedef union PA_PH_PERFCOUNTER5_LO regPA_PH_PERFCOUNTER5_LO; -typedef union PA_PH_PERFCOUNTER5_SELECT regPA_PH_PERFCOUNTER5_SELECT; -typedef union PA_PH_PERFCOUNTER6_HI regPA_PH_PERFCOUNTER6_HI; -typedef union PA_PH_PERFCOUNTER6_LO regPA_PH_PERFCOUNTER6_LO; -typedef union PA_PH_PERFCOUNTER6_SELECT regPA_PH_PERFCOUNTER6_SELECT; -typedef union PA_PH_PERFCOUNTER7_HI regPA_PH_PERFCOUNTER7_HI; -typedef union PA_PH_PERFCOUNTER7_LO regPA_PH_PERFCOUNTER7_LO; -typedef union PA_PH_PERFCOUNTER7_SELECT regPA_PH_PERFCOUNTER7_SELECT; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union PA_RATE_CNTL regPA_RATE_CNTL; -#endif -typedef union PA_SC_AA_CONFIG regPA_SC_AA_CONFIG; -typedef union PA_SC_AA_MASK_X0Y0_X1Y0 regPA_SC_AA_MASK_X0Y0_X1Y0; -typedef union PA_SC_AA_MASK_X0Y1_X1Y1 regPA_SC_AA_MASK_X0Y1_X1Y1; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2; -typedef union PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3 regPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union PA_SC_ATM_CNTL regPA_SC_ATM_CNTL; -#endif -typedef union PA_SC_BC_WAVE_BREAK regPA_SC_BC_WAVE_BREAK; -typedef union PA_SC_BINNER_CNTL_0 regPA_SC_BINNER_CNTL_0; -typedef union PA_SC_BINNER_CNTL_1 regPA_SC_BINNER_CNTL_1; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union PA_SC_BINNER_CNTL_2 regPA_SC_BINNER_CNTL_2; -#endif -typedef union PA_SC_BINNER_CNTL_OVERRIDE regPA_SC_BINNER_CNTL_OVERRIDE; -typedef union PA_SC_BINNER_EVENT_CNTL_0 regPA_SC_BINNER_EVENT_CNTL_0; -typedef union PA_SC_BINNER_EVENT_CNTL_1 regPA_SC_BINNER_EVENT_CNTL_1; -typedef union PA_SC_BINNER_EVENT_CNTL_2 regPA_SC_BINNER_EVENT_CNTL_2; -typedef union PA_SC_BINNER_EVENT_CNTL_3 regPA_SC_BINNER_EVENT_CNTL_3; -typedef union PA_SC_BINNER_PERF_CNTL_0 regPA_SC_BINNER_PERF_CNTL_0; -typedef union PA_SC_BINNER_PERF_CNTL_1 regPA_SC_BINNER_PERF_CNTL_1; -typedef union PA_SC_BINNER_PERF_CNTL_2 regPA_SC_BINNER_PERF_CNTL_2; -typedef union PA_SC_BINNER_PERF_CNTL_3 regPA_SC_BINNER_PERF_CNTL_3; -typedef union PA_SC_BINNER_TIMEOUT_COUNTER regPA_SC_BINNER_TIMEOUT_COUNTER; -typedef union PA_SC_CENTROID_PRIORITY_0 regPA_SC_CENTROID_PRIORITY_0; -typedef union PA_SC_CENTROID_PRIORITY_1 regPA_SC_CENTROID_PRIORITY_1; -typedef union PA_SC_CLIPRECT_0_BR regPA_SC_CLIPRECT_0_BR; -typedef union PA_SC_CLIPRECT_0_TL regPA_SC_CLIPRECT_0_TL; -typedef union PA_SC_CLIPRECT_1_BR regPA_SC_CLIPRECT_1_BR; -typedef union PA_SC_CLIPRECT_1_TL regPA_SC_CLIPRECT_1_TL; -typedef union PA_SC_CLIPRECT_2_BR regPA_SC_CLIPRECT_2_BR; -typedef union PA_SC_CLIPRECT_2_TL regPA_SC_CLIPRECT_2_TL; -typedef union PA_SC_CLIPRECT_3_BR regPA_SC_CLIPRECT_3_BR; -typedef union PA_SC_CLIPRECT_3_TL regPA_SC_CLIPRECT_3_TL; -typedef union PA_SC_CLIPRECT_RULE regPA_SC_CLIPRECT_RULE; -typedef union PA_SC_CONSERVATIVE_RASTERIZATION_CNTL regPA_SC_CONSERVATIVE_RASTERIZATION_CNTL; -typedef union PA_SC_DSM_CNTL regPA_SC_DSM_CNTL; -typedef union PA_SC_EDGERULE regPA_SC_EDGERULE; -typedef union PA_SC_ENHANCE regPA_SC_ENHANCE; -typedef union PA_SC_ENHANCE_1 regPA_SC_ENHANCE_1; -typedef union PA_SC_ENHANCE_2 regPA_SC_ENHANCE_2; -typedef union PA_SC_ENHANCE_3 regPA_SC_ENHANCE_3; -typedef union PA_SC_ENHANCE_INTERNAL regPA_SC_ENHANCE_INTERNAL; -typedef union PA_SC_FIFO_DEPTH_CNTL regPA_SC_FIFO_DEPTH_CNTL; -typedef union PA_SC_FIFO_SIZE regPA_SC_FIFO_SIZE; -typedef union PA_SC_FORCE_EOV_MAX_CNTS regPA_SC_FORCE_EOV_MAX_CNTS; -typedef union PA_SC_GENERIC_SCISSOR_BR regPA_SC_GENERIC_SCISSOR_BR; -typedef union PA_SC_GENERIC_SCISSOR_TL regPA_SC_GENERIC_SCISSOR_TL; -typedef union PA_SC_HP3D_TRAP_SCREEN_COUNT regPA_SC_HP3D_TRAP_SCREEN_COUNT; -typedef union PA_SC_HP3D_TRAP_SCREEN_H regPA_SC_HP3D_TRAP_SCREEN_H; -typedef union PA_SC_HP3D_TRAP_SCREEN_HV_EN regPA_SC_HP3D_TRAP_SCREEN_HV_EN; -typedef union PA_SC_HP3D_TRAP_SCREEN_HV_LOCK regPA_SC_HP3D_TRAP_SCREEN_HV_LOCK; -typedef union PA_SC_HP3D_TRAP_SCREEN_OCCURRENCE regPA_SC_HP3D_TRAP_SCREEN_OCCURRENCE; -typedef union PA_SC_HP3D_TRAP_SCREEN_V regPA_SC_HP3D_TRAP_SCREEN_V; -typedef union PA_SC_IF_FIFO_SIZE regPA_SC_IF_FIFO_SIZE; -typedef union PA_SC_LINE_CNTL regPA_SC_LINE_CNTL; -typedef union PA_SC_LINE_STIPPLE regPA_SC_LINE_STIPPLE; -typedef union PA_SC_LINE_STIPPLE_STATE regPA_SC_LINE_STIPPLE_STATE; -typedef union PA_SC_MODE_CNTL_0 regPA_SC_MODE_CNTL_0; -typedef union PA_SC_MODE_CNTL_1 regPA_SC_MODE_CNTL_1; -typedef union PA_SC_NGG_MODE_CNTL regPA_SC_NGG_MODE_CNTL; -typedef union PA_SC_P3D_TRAP_SCREEN_COUNT regPA_SC_P3D_TRAP_SCREEN_COUNT; -typedef union PA_SC_P3D_TRAP_SCREEN_H regPA_SC_P3D_TRAP_SCREEN_H; -typedef union PA_SC_P3D_TRAP_SCREEN_HV_EN regPA_SC_P3D_TRAP_SCREEN_HV_EN; -typedef union PA_SC_P3D_TRAP_SCREEN_HV_LOCK regPA_SC_P3D_TRAP_SCREEN_HV_LOCK; -typedef union PA_SC_P3D_TRAP_SCREEN_OCCURRENCE regPA_SC_P3D_TRAP_SCREEN_OCCURRENCE; -typedef union PA_SC_P3D_TRAP_SCREEN_V regPA_SC_P3D_TRAP_SCREEN_V; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union PA_SC_PACKER_WAVE_ID_CNTL regPA_SC_PACKER_WAVE_ID_CNTL; -#endif -typedef union PA_SC_PBB_OVERRIDE_FLAG regPA_SC_PBB_OVERRIDE_FLAG; -typedef union PA_SC_PERFCOUNTER0_HI regPA_SC_PERFCOUNTER0_HI; -typedef union PA_SC_PERFCOUNTER0_LO regPA_SC_PERFCOUNTER0_LO; -typedef union PA_SC_PERFCOUNTER0_SELECT regPA_SC_PERFCOUNTER0_SELECT; -typedef union PA_SC_PERFCOUNTER0_SELECT1 regPA_SC_PERFCOUNTER0_SELECT1; -typedef union PA_SC_PERFCOUNTER1_HI regPA_SC_PERFCOUNTER1_HI; -typedef union PA_SC_PERFCOUNTER1_LO regPA_SC_PERFCOUNTER1_LO; -typedef union PA_SC_PERFCOUNTER1_SELECT regPA_SC_PERFCOUNTER1_SELECT; -typedef union PA_SC_PERFCOUNTER2_HI regPA_SC_PERFCOUNTER2_HI; -typedef union PA_SC_PERFCOUNTER2_LO regPA_SC_PERFCOUNTER2_LO; -typedef union PA_SC_PERFCOUNTER2_SELECT regPA_SC_PERFCOUNTER2_SELECT; -typedef union PA_SC_PERFCOUNTER3_HI regPA_SC_PERFCOUNTER3_HI; -typedef union PA_SC_PERFCOUNTER3_LO regPA_SC_PERFCOUNTER3_LO; -typedef union PA_SC_PERFCOUNTER3_SELECT regPA_SC_PERFCOUNTER3_SELECT; -typedef union PA_SC_PERFCOUNTER4_HI regPA_SC_PERFCOUNTER4_HI; -typedef union PA_SC_PERFCOUNTER4_LO regPA_SC_PERFCOUNTER4_LO; -typedef union PA_SC_PERFCOUNTER4_SELECT regPA_SC_PERFCOUNTER4_SELECT; -typedef union PA_SC_PERFCOUNTER5_HI regPA_SC_PERFCOUNTER5_HI; -typedef union PA_SC_PERFCOUNTER5_LO regPA_SC_PERFCOUNTER5_LO; -typedef union PA_SC_PERFCOUNTER5_SELECT regPA_SC_PERFCOUNTER5_SELECT; -typedef union PA_SC_PERFCOUNTER6_HI regPA_SC_PERFCOUNTER6_HI; -typedef union PA_SC_PERFCOUNTER6_LO regPA_SC_PERFCOUNTER6_LO; -typedef union PA_SC_PERFCOUNTER6_SELECT regPA_SC_PERFCOUNTER6_SELECT; -typedef union PA_SC_PERFCOUNTER7_HI regPA_SC_PERFCOUNTER7_HI; -typedef union PA_SC_PERFCOUNTER7_LO regPA_SC_PERFCOUNTER7_LO; -typedef union PA_SC_PERFCOUNTER7_SELECT regPA_SC_PERFCOUNTER7_SELECT; -typedef union PA_SC_PKR_WAVE_TABLE_CNTL regPA_SC_PKR_WAVE_TABLE_CNTL; -typedef union PA_SC_RASTER_CONFIG regPA_SC_RASTER_CONFIG; -typedef union PA_SC_RASTER_CONFIG_1 regPA_SC_RASTER_CONFIG_1; -typedef union PA_SC_SCREEN_SCISSOR_BR regPA_SC_SCREEN_SCISSOR_BR; -typedef union PA_SC_SCREEN_SCISSOR_TL regPA_SC_SCREEN_SCISSOR_TL; -typedef union PA_SC_SHADER_CONTROL regPA_SC_SHADER_CONTROL; -typedef union PA_SC_TILE_STEERING_CREST_OVERRIDE regPA_SC_TILE_STEERING_CREST_OVERRIDE; -typedef union PA_SC_TILE_STEERING_OVERRIDE regPA_SC_TILE_STEERING_OVERRIDE; -typedef union PA_SC_TRAP_SCREEN_COUNT regPA_SC_TRAP_SCREEN_COUNT; -typedef union PA_SC_TRAP_SCREEN_H regPA_SC_TRAP_SCREEN_H; -typedef union PA_SC_TRAP_SCREEN_HV_EN regPA_SC_TRAP_SCREEN_HV_EN; -typedef union PA_SC_TRAP_SCREEN_HV_LOCK regPA_SC_TRAP_SCREEN_HV_LOCK; -typedef union PA_SC_TRAP_SCREEN_OCCURRENCE regPA_SC_TRAP_SCREEN_OCCURRENCE; -typedef union PA_SC_TRAP_SCREEN_V regPA_SC_TRAP_SCREEN_V; -typedef union PA_SC_VPORT_SCISSOR_0_BR regPA_SC_VPORT_SCISSOR_0_BR; -typedef union PA_SC_VPORT_SCISSOR_0_TL regPA_SC_VPORT_SCISSOR_0_TL; -typedef union PA_SC_VPORT_SCISSOR_1_BR regPA_SC_VPORT_SCISSOR_1_BR; -typedef union PA_SC_VPORT_SCISSOR_1_TL regPA_SC_VPORT_SCISSOR_1_TL; -typedef union PA_SC_VPORT_SCISSOR_2_BR regPA_SC_VPORT_SCISSOR_2_BR; -typedef union PA_SC_VPORT_SCISSOR_2_TL regPA_SC_VPORT_SCISSOR_2_TL; -typedef union PA_SC_VPORT_SCISSOR_3_BR regPA_SC_VPORT_SCISSOR_3_BR; -typedef union PA_SC_VPORT_SCISSOR_3_TL regPA_SC_VPORT_SCISSOR_3_TL; -typedef union PA_SC_VPORT_SCISSOR_4_BR regPA_SC_VPORT_SCISSOR_4_BR; -typedef union PA_SC_VPORT_SCISSOR_4_TL regPA_SC_VPORT_SCISSOR_4_TL; -typedef union PA_SC_VPORT_SCISSOR_5_BR regPA_SC_VPORT_SCISSOR_5_BR; -typedef union PA_SC_VPORT_SCISSOR_5_TL regPA_SC_VPORT_SCISSOR_5_TL; -typedef union PA_SC_VPORT_SCISSOR_6_BR regPA_SC_VPORT_SCISSOR_6_BR; -typedef union PA_SC_VPORT_SCISSOR_6_TL regPA_SC_VPORT_SCISSOR_6_TL; -typedef union PA_SC_VPORT_SCISSOR_7_BR regPA_SC_VPORT_SCISSOR_7_BR; -typedef union PA_SC_VPORT_SCISSOR_7_TL regPA_SC_VPORT_SCISSOR_7_TL; -typedef union PA_SC_VPORT_SCISSOR_8_BR regPA_SC_VPORT_SCISSOR_8_BR; -typedef union PA_SC_VPORT_SCISSOR_8_TL regPA_SC_VPORT_SCISSOR_8_TL; -typedef union PA_SC_VPORT_SCISSOR_9_BR regPA_SC_VPORT_SCISSOR_9_BR; -typedef union PA_SC_VPORT_SCISSOR_9_TL regPA_SC_VPORT_SCISSOR_9_TL; -typedef union PA_SC_VPORT_SCISSOR_10_BR regPA_SC_VPORT_SCISSOR_10_BR; -typedef union PA_SC_VPORT_SCISSOR_10_TL regPA_SC_VPORT_SCISSOR_10_TL; -typedef union PA_SC_VPORT_SCISSOR_11_BR regPA_SC_VPORT_SCISSOR_11_BR; -typedef union PA_SC_VPORT_SCISSOR_11_TL regPA_SC_VPORT_SCISSOR_11_TL; -typedef union PA_SC_VPORT_SCISSOR_12_BR regPA_SC_VPORT_SCISSOR_12_BR; -typedef union PA_SC_VPORT_SCISSOR_12_TL regPA_SC_VPORT_SCISSOR_12_TL; -typedef union PA_SC_VPORT_SCISSOR_13_BR regPA_SC_VPORT_SCISSOR_13_BR; -typedef union PA_SC_VPORT_SCISSOR_13_TL regPA_SC_VPORT_SCISSOR_13_TL; -typedef union PA_SC_VPORT_SCISSOR_14_BR regPA_SC_VPORT_SCISSOR_14_BR; -typedef union PA_SC_VPORT_SCISSOR_14_TL regPA_SC_VPORT_SCISSOR_14_TL; -typedef union PA_SC_VPORT_SCISSOR_15_BR regPA_SC_VPORT_SCISSOR_15_BR; -typedef union PA_SC_VPORT_SCISSOR_15_TL regPA_SC_VPORT_SCISSOR_15_TL; -typedef union PA_SC_VPORT_ZMAX_0 regPA_SC_VPORT_ZMAX_0; -typedef union PA_SC_VPORT_ZMAX_1 regPA_SC_VPORT_ZMAX_1; -typedef union PA_SC_VPORT_ZMAX_2 regPA_SC_VPORT_ZMAX_2; -typedef union PA_SC_VPORT_ZMAX_3 regPA_SC_VPORT_ZMAX_3; -typedef union PA_SC_VPORT_ZMAX_4 regPA_SC_VPORT_ZMAX_4; -typedef union PA_SC_VPORT_ZMAX_5 regPA_SC_VPORT_ZMAX_5; -typedef union PA_SC_VPORT_ZMAX_6 regPA_SC_VPORT_ZMAX_6; -typedef union PA_SC_VPORT_ZMAX_7 regPA_SC_VPORT_ZMAX_7; -typedef union PA_SC_VPORT_ZMAX_8 regPA_SC_VPORT_ZMAX_8; -typedef union PA_SC_VPORT_ZMAX_9 regPA_SC_VPORT_ZMAX_9; -typedef union PA_SC_VPORT_ZMAX_10 regPA_SC_VPORT_ZMAX_10; -typedef union PA_SC_VPORT_ZMAX_11 regPA_SC_VPORT_ZMAX_11; -typedef union PA_SC_VPORT_ZMAX_12 regPA_SC_VPORT_ZMAX_12; -typedef union PA_SC_VPORT_ZMAX_13 regPA_SC_VPORT_ZMAX_13; -typedef union PA_SC_VPORT_ZMAX_14 regPA_SC_VPORT_ZMAX_14; -typedef union PA_SC_VPORT_ZMAX_15 regPA_SC_VPORT_ZMAX_15; -typedef union PA_SC_VPORT_ZMIN_0 regPA_SC_VPORT_ZMIN_0; -typedef union PA_SC_VPORT_ZMIN_1 regPA_SC_VPORT_ZMIN_1; -typedef union PA_SC_VPORT_ZMIN_2 regPA_SC_VPORT_ZMIN_2; -typedef union PA_SC_VPORT_ZMIN_3 regPA_SC_VPORT_ZMIN_3; -typedef union PA_SC_VPORT_ZMIN_4 regPA_SC_VPORT_ZMIN_4; -typedef union PA_SC_VPORT_ZMIN_5 regPA_SC_VPORT_ZMIN_5; -typedef union PA_SC_VPORT_ZMIN_6 regPA_SC_VPORT_ZMIN_6; -typedef union PA_SC_VPORT_ZMIN_7 regPA_SC_VPORT_ZMIN_7; -typedef union PA_SC_VPORT_ZMIN_8 regPA_SC_VPORT_ZMIN_8; -typedef union PA_SC_VPORT_ZMIN_9 regPA_SC_VPORT_ZMIN_9; -typedef union PA_SC_VPORT_ZMIN_10 regPA_SC_VPORT_ZMIN_10; -typedef union PA_SC_VPORT_ZMIN_11 regPA_SC_VPORT_ZMIN_11; -typedef union PA_SC_VPORT_ZMIN_12 regPA_SC_VPORT_ZMIN_12; -typedef union PA_SC_VPORT_ZMIN_13 regPA_SC_VPORT_ZMIN_13; -typedef union PA_SC_VPORT_ZMIN_14 regPA_SC_VPORT_ZMIN_14; -typedef union PA_SC_VPORT_ZMIN_15 regPA_SC_VPORT_ZMIN_15; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union PA_SC_VRS_OVERRIDE_CNTL regPA_SC_VRS_OVERRIDE_CNTL; -typedef union PA_SC_VRS_RATE_BASE regPA_SC_VRS_RATE_BASE; -typedef union PA_SC_VRS_RATE_BASE_EXT regPA_SC_VRS_RATE_BASE_EXT; -typedef union PA_SC_VRS_RATE_CACHE_CNTL regPA_SC_VRS_RATE_CACHE_CNTL; -typedef union PA_SC_VRS_RATE_FEEDBACK_BASE regPA_SC_VRS_RATE_FEEDBACK_BASE; -typedef union PA_SC_VRS_RATE_FEEDBACK_BASE_EXT regPA_SC_VRS_RATE_FEEDBACK_BASE_EXT; -typedef union PA_SC_VRS_RATE_FEEDBACK_SIZE_XY regPA_SC_VRS_RATE_FEEDBACK_SIZE_XY; -typedef union PA_SC_VRS_RATE_SIZE_XY regPA_SC_VRS_RATE_SIZE_XY; -typedef union PA_SC_VRS_SURFACE_CNTL regPA_SC_VRS_SURFACE_CNTL; -typedef union PA_SC_VRS_SURFACE_CNTL_1 regPA_SC_VRS_SURFACE_CNTL_1; -#endif -typedef union PA_SC_WINDOW_OFFSET regPA_SC_WINDOW_OFFSET; -typedef union PA_SC_WINDOW_SCISSOR_BR regPA_SC_WINDOW_SCISSOR_BR; -typedef union PA_SC_WINDOW_SCISSOR_TL regPA_SC_WINDOW_SCISSOR_TL; -typedef union PA_SIDEBAND_REQUEST_DELAYS regPA_SIDEBAND_REQUEST_DELAYS; -typedef union PA_STATE_STEREO_X regPA_STATE_STEREO_X; -typedef union PA_STEREO_CNTL regPA_STEREO_CNTL; -typedef union PA_SU_CNTL_STATUS regPA_SU_CNTL_STATUS; -typedef union PA_SU_HARDWARE_SCREEN_OFFSET regPA_SU_HARDWARE_SCREEN_OFFSET; -typedef union PA_SU_LINE_CNTL regPA_SU_LINE_CNTL; -typedef union PA_SU_LINE_STIPPLE_CNTL regPA_SU_LINE_STIPPLE_CNTL; -typedef union PA_SU_LINE_STIPPLE_SCALE regPA_SU_LINE_STIPPLE_SCALE; -typedef union PA_SU_LINE_STIPPLE_VALUE regPA_SU_LINE_STIPPLE_VALUE; -typedef union PA_SU_OVER_RASTERIZATION_CNTL regPA_SU_OVER_RASTERIZATION_CNTL; -typedef union PA_SU_PERFCOUNTER0_HI regPA_SU_PERFCOUNTER0_HI; -typedef union PA_SU_PERFCOUNTER0_LO regPA_SU_PERFCOUNTER0_LO; -typedef union PA_SU_PERFCOUNTER0_SELECT regPA_SU_PERFCOUNTER0_SELECT; -typedef union PA_SU_PERFCOUNTER0_SELECT1 regPA_SU_PERFCOUNTER0_SELECT1; -typedef union PA_SU_PERFCOUNTER1_HI regPA_SU_PERFCOUNTER1_HI; -typedef union PA_SU_PERFCOUNTER1_LO regPA_SU_PERFCOUNTER1_LO; -typedef union PA_SU_PERFCOUNTER1_SELECT regPA_SU_PERFCOUNTER1_SELECT; -typedef union PA_SU_PERFCOUNTER1_SELECT1 regPA_SU_PERFCOUNTER1_SELECT1; -typedef union PA_SU_PERFCOUNTER2_HI regPA_SU_PERFCOUNTER2_HI; -typedef union PA_SU_PERFCOUNTER2_LO regPA_SU_PERFCOUNTER2_LO; -typedef union PA_SU_PERFCOUNTER2_SELECT regPA_SU_PERFCOUNTER2_SELECT; -typedef union PA_SU_PERFCOUNTER2_SELECT1 regPA_SU_PERFCOUNTER2_SELECT1; -typedef union PA_SU_PERFCOUNTER3_HI regPA_SU_PERFCOUNTER3_HI; -typedef union PA_SU_PERFCOUNTER3_LO regPA_SU_PERFCOUNTER3_LO; -typedef union PA_SU_PERFCOUNTER3_SELECT regPA_SU_PERFCOUNTER3_SELECT; -typedef union PA_SU_PERFCOUNTER3_SELECT1 regPA_SU_PERFCOUNTER3_SELECT1; -typedef union PA_SU_POINT_MINMAX regPA_SU_POINT_MINMAX; -typedef union PA_SU_POINT_SIZE regPA_SU_POINT_SIZE; -typedef union PA_SU_POLY_OFFSET_BACK_OFFSET regPA_SU_POLY_OFFSET_BACK_OFFSET; -typedef union PA_SU_POLY_OFFSET_BACK_SCALE regPA_SU_POLY_OFFSET_BACK_SCALE; -typedef union PA_SU_POLY_OFFSET_CLAMP regPA_SU_POLY_OFFSET_CLAMP; -typedef union PA_SU_POLY_OFFSET_DB_FMT_CNTL regPA_SU_POLY_OFFSET_DB_FMT_CNTL; -typedef union PA_SU_POLY_OFFSET_FRONT_OFFSET regPA_SU_POLY_OFFSET_FRONT_OFFSET; -typedef union PA_SU_POLY_OFFSET_FRONT_SCALE regPA_SU_POLY_OFFSET_FRONT_SCALE; -typedef union PA_SU_PRIM_FILTER_CNTL regPA_SU_PRIM_FILTER_CNTL; -typedef union PA_SU_SC_MODE_CNTL regPA_SU_SC_MODE_CNTL; -typedef union PA_SU_SMALL_PRIM_FILTER_CNTL regPA_SU_SMALL_PRIM_FILTER_CNTL; -typedef union PA_SU_VTX_CNTL regPA_SU_VTX_CNTL; -typedef union PA_UTCL1_CNTL1 regPA_UTCL1_CNTL1; -typedef union PA_UTCL1_CNTL2 regPA_UTCL1_CNTL2; -typedef union PerfMonCtl1 regPerfMonCtl1; -typedef union PerfMonCtl2 regPerfMonCtl2; -typedef union PerfMonCtl3 regPerfMonCtl3; -typedef union PerfMonCtl4 regPerfMonCtl4; -typedef union PerfMonCtl5 regPerfMonCtl5; -#if CHIP_HDR_NAVI31 -typedef union PerfMonCtl6 regPerfMonCtl6; -typedef union PerfMonCtl7 regPerfMonCtl7; -typedef union PerfMonCtl8 regPerfMonCtl8; -typedef union PerfMonCtl9 regPerfMonCtl9; -typedef union PerfMonCtl10 regPerfMonCtl10; -typedef union PerfMonCtl11 regPerfMonCtl11; -typedef union PerfMonCtl12 regPerfMonCtl12; -#endif -typedef union PerfMonCtlClk regPerfMonCtlClk; -typedef union PerfMonCtr1_Hi regPerfMonCtr1_Hi; -typedef union PerfMonCtr1_Lo regPerfMonCtr1_Lo; -typedef union PerfMonCtr2_Hi regPerfMonCtr2_Hi; -typedef union PerfMonCtr2_Lo regPerfMonCtr2_Lo; -typedef union PerfMonCtr3_Hi regPerfMonCtr3_Hi; -typedef union PerfMonCtr3_Lo regPerfMonCtr3_Lo; -typedef union PerfMonCtr4_Hi regPerfMonCtr4_Hi; -typedef union PerfMonCtr4_Lo regPerfMonCtr4_Lo; -typedef union PerfMonCtr5_Hi regPerfMonCtr5_Hi; -typedef union PerfMonCtr5_Lo regPerfMonCtr5_Lo; -#if CHIP_HDR_NAVI31 -typedef union PerfMonCtr6_Hi regPerfMonCtr6_Hi; -typedef union PerfMonCtr6_Lo regPerfMonCtr6_Lo; -typedef union PerfMonCtr7_Hi regPerfMonCtr7_Hi; -typedef union PerfMonCtr7_Lo regPerfMonCtr7_Lo; -typedef union PerfMonCtr8_Hi regPerfMonCtr8_Hi; -typedef union PerfMonCtr8_Lo regPerfMonCtr8_Lo; -typedef union PerfMonCtr9_Hi regPerfMonCtr9_Hi; -typedef union PerfMonCtr9_Lo regPerfMonCtr9_Lo; -typedef union PerfMonCtr10_Hi regPerfMonCtr10_Hi; -typedef union PerfMonCtr10_Lo regPerfMonCtr10_Lo; -typedef union PerfMonCtr11_Hi regPerfMonCtr11_Hi; -typedef union PerfMonCtr11_Lo regPerfMonCtr11_Lo; -typedef union PerfMonCtr12_Hi regPerfMonCtr12_Hi; -typedef union PerfMonCtr12_Lo regPerfMonCtr12_Lo; -#endif -typedef union PerfMonCtrClk_Hi regPerfMonCtrClk_Hi; -typedef union PerfMonCtrClk_Lo regPerfMonCtrClk_Lo; -typedef union RLC_CGTT_MGCG_OVERRIDE regRLC_CGTT_MGCG_OVERRIDE; -typedef union RLC_PERFCOUNTER0_HI regRLC_PERFCOUNTER0_HI; -typedef union RLC_PERFCOUNTER0_LO regRLC_PERFCOUNTER0_LO; -typedef union RLC_PERFCOUNTER0_SELECT regRLC_PERFCOUNTER0_SELECT; -typedef union RLC_PERFCOUNTER1_HI regRLC_PERFCOUNTER1_HI; -typedef union RLC_PERFCOUNTER1_LO regRLC_PERFCOUNTER1_LO; -typedef union RLC_PERFCOUNTER1_SELECT regRLC_PERFCOUNTER1_SELECT; -typedef union RLC_PERFMON_CLK_CNTL regRLC_PERFMON_CLK_CNTL; -typedef union RLC_PERFMON_CNTL regRLC_PERFMON_CNTL; -typedef union RLC_SPM_ACCUM_CTRL regRLC_SPM_ACCUM_CTRL; -typedef union RLC_SPM_ACCUM_CTRLRAM_ADDR regRLC_SPM_ACCUM_CTRLRAM_ADDR; -typedef union RLC_SPM_ACCUM_CTRLRAM_ADDR_OFFSET regRLC_SPM_ACCUM_CTRLRAM_ADDR_OFFSET; -typedef union RLC_SPM_ACCUM_CTRLRAM_DATA regRLC_SPM_ACCUM_CTRLRAM_DATA; -typedef union RLC_SPM_ACCUM_DATARAM_32BITCNTRS_REGIONS regRLC_SPM_ACCUM_DATARAM_32BITCNTRS_REGIONS; -typedef union RLC_SPM_ACCUM_DATARAM_ADDR regRLC_SPM_ACCUM_DATARAM_ADDR; -typedef union RLC_SPM_ACCUM_DATARAM_DATA regRLC_SPM_ACCUM_DATARAM_DATA; -typedef union RLC_SPM_ACCUM_DATARAM_WRCOUNT regRLC_SPM_ACCUM_DATARAM_WRCOUNT; -typedef union RLC_SPM_ACCUM_MODE regRLC_SPM_ACCUM_MODE; -typedef union RLC_SPM_ACCUM_SAMPLES_REQUESTED regRLC_SPM_ACCUM_SAMPLES_REQUESTED; -typedef union RLC_SPM_ACCUM_STATUS regRLC_SPM_ACCUM_STATUS; -typedef union RLC_SPM_ACCUM_SWA_DATARAM_ADDR regRLC_SPM_ACCUM_SWA_DATARAM_ADDR; -typedef union RLC_SPM_ACCUM_SWA_DATARAM_DATA regRLC_SPM_ACCUM_SWA_DATARAM_DATA; -typedef union RLC_SPM_ACCUM_THRESHOLD regRLC_SPM_ACCUM_THRESHOLD; -typedef union RLC_SPM_CBR0_PERFMON_SAMPLE_DELAY regRLC_SPM_CBR0_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_CBR1_PERFMON_SAMPLE_DELAY regRLC_SPM_CBR1_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_CB_PERFMON_SAMPLE_DELAY regRLC_SPM_CB_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_CPC_PERFMON_SAMPLE_DELAY regRLC_SPM_CPC_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_CPF_PERFMON_SAMPLE_DELAY regRLC_SPM_CPF_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_CPG_PERFMON_SAMPLE_DELAY regRLC_SPM_CPG_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_DBR0_PERFMON_SAMPLE_DELAY regRLC_SPM_DBR0_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_DBR1_PERFMON_SAMPLE_DELAY regRLC_SPM_DBR1_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_DB_PERFMON_SAMPLE_DELAY regRLC_SPM_DB_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_DESER_START_SKEW regRLC_SPM_DESER_START_SKEW; -typedef union RLC_SPM_GDS_PERFMON_SAMPLE_DELAY regRLC_SPM_GDS_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_GFXCLOCK_HIGHCOUNT regRLC_SPM_GFXCLOCK_HIGHCOUNT; -typedef union RLC_SPM_GFXCLOCK_LOWCOUNT regRLC_SPM_GFXCLOCK_LOWCOUNT; -typedef union RLC_SPM_GLB_SAMPLEDELAY_IND_ADDR regRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR; -typedef union RLC_SPM_GLB_SAMPLEDELAY_IND_DATA regRLC_SPM_GLB_SAMPLEDELAY_IND_DATA; -typedef union RLC_SPM_GLOBALS_MUXSEL_SKEW regRLC_SPM_GLOBALS_MUXSEL_SKEW; -typedef union RLC_SPM_GLOBALS_SAMPLE_SKEW regRLC_SPM_GLOBALS_SAMPLE_SKEW; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union RLC_SPM_GLOBAL_DELAY_IND_ADDR regRLC_SPM_GLOBAL_DELAY_IND_ADDR; -typedef union RLC_SPM_GLOBAL_DELAY_IND_DATA regRLC_SPM_GLOBAL_DELAY_IND_DATA; -#endif -typedef union RLC_SPM_GLOBAL_MUXSEL_ADDR regRLC_SPM_GLOBAL_MUXSEL_ADDR; -typedef union RLC_SPM_GLOBAL_MUXSEL_ADDR_OFFSET regRLC_SPM_GLOBAL_MUXSEL_ADDR_OFFSET; -typedef union RLC_SPM_GLOBAL_MUXSEL_DATA regRLC_SPM_GLOBAL_MUXSEL_DATA; -typedef union RLC_SPM_IA_PERFMON_SAMPLE_DELAY regRLC_SPM_IA_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_INT_CNTL regRLC_SPM_INT_CNTL; -typedef union RLC_SPM_INT_INFO_1 regRLC_SPM_INT_INFO_1; -typedef union RLC_SPM_INT_INFO_2 regRLC_SPM_INT_INFO_2; -typedef union RLC_SPM_INT_STATUS regRLC_SPM_INT_STATUS; -typedef union RLC_SPM_MC_CNTL regRLC_SPM_MC_CNTL; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union RLC_SPM_MODE regRLC_SPM_MODE; -typedef union RLC_SPM_PAUSE regRLC_SPM_PAUSE; -#endif -typedef union RLC_SPM_PA_PERFMON_SAMPLE_DELAY regRLC_SPM_PA_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_PERFMON_CNTL regRLC_SPM_PERFMON_CNTL; -typedef union RLC_SPM_PERFMON_GLB_SEGMENT_SIZE regRLC_SPM_PERFMON_GLB_SEGMENT_SIZE; -typedef union RLC_SPM_PERFMON_RING_BASE_HI regRLC_SPM_PERFMON_RING_BASE_HI; -typedef union RLC_SPM_PERFMON_RING_BASE_LO regRLC_SPM_PERFMON_RING_BASE_LO; -typedef union RLC_SPM_PERFMON_RING_SIZE regRLC_SPM_PERFMON_RING_SIZE; -typedef union RLC_SPM_PERFMON_SAMPLE_DELAY_MAX regRLC_SPM_PERFMON_SAMPLE_DELAY_MAX; -typedef union RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE regRLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE; -typedef union RLC_SPM_PERFMON_SEGMENT_SIZE regRLC_SPM_PERFMON_SEGMENT_SIZE; -typedef union RLC_SPM_PERFMON_SWA_GLB_SEGMENT_SIZE regRLC_SPM_PERFMON_SWA_GLB_SEGMENT_SIZE; -typedef union RLC_SPM_PERFMON_SWA_SE3TO0_SEGMENT_SIZE regRLC_SPM_PERFMON_SWA_SE3TO0_SEGMENT_SIZE; -typedef union RLC_SPM_PERFMON_SWA_SEGMENT_SIZE regRLC_SPM_PERFMON_SWA_SEGMENT_SIZE; -typedef union RLC_SPM_RING_RDPTR regRLC_SPM_RING_RDPTR; -typedef union RLC_SPM_RING_WRPTR regRLC_SPM_RING_WRPTR; -typedef union RLC_SPM_RMI_PERFMON_SAMPLE_DELAY regRLC_SPM_RMI_PERFMON_SAMPLE_DELAY; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union RLC_SPM_RSPM_CMD regRLC_SPM_RSPM_CMD; -typedef union RLC_SPM_RSPM_CMD_ACK regRLC_SPM_RSPM_CMD_ACK; -typedef union RLC_SPM_RSPM_REQ_DATA_HI regRLC_SPM_RSPM_REQ_DATA_HI; -typedef union RLC_SPM_RSPM_REQ_DATA_LO regRLC_SPM_RSPM_REQ_DATA_LO; -typedef union RLC_SPM_RSPM_REQ_OP regRLC_SPM_RSPM_REQ_OP; -typedef union RLC_SPM_RSPM_RET_DATA regRLC_SPM_RSPM_RET_DATA; -typedef union RLC_SPM_RSPM_RET_OP regRLC_SPM_RSPM_RET_OP; -#endif -typedef union RLC_SPM_SAMPLE_CNT regRLC_SPM_SAMPLE_CNT; -typedef union RLC_SPM_SC_PERFMON_SAMPLE_DELAY regRLC_SPM_SC_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_SEGMENT_THRESHOLD regRLC_SPM_SEGMENT_THRESHOLD; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union RLC_SPM_SE_DELAY_IND_ADDR regRLC_SPM_SE_DELAY_IND_ADDR; -typedef union RLC_SPM_SE_DELAY_IND_DATA regRLC_SPM_SE_DELAY_IND_DATA; -#endif -typedef union RLC_SPM_SE_MUXSEL_ADDR regRLC_SPM_SE_MUXSEL_ADDR; -typedef union RLC_SPM_SE_MUXSEL_ADDR_OFFSET regRLC_SPM_SE_MUXSEL_ADDR_OFFSET; -typedef union RLC_SPM_SE_MUXSEL_DATA regRLC_SPM_SE_MUXSEL_DATA; -typedef union RLC_SPM_SE_MUXSEL_SKEW regRLC_SPM_SE_MUXSEL_SKEW; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union RLC_SPM_SE_RSPM_REQ_DATA_HI regRLC_SPM_SE_RSPM_REQ_DATA_HI; -typedef union RLC_SPM_SE_RSPM_REQ_DATA_LO regRLC_SPM_SE_RSPM_REQ_DATA_LO; -typedef union RLC_SPM_SE_RSPM_REQ_OP regRLC_SPM_SE_RSPM_REQ_OP; -typedef union RLC_SPM_SE_RSPM_RET_DATA regRLC_SPM_SE_RSPM_RET_DATA; -typedef union RLC_SPM_SE_RSPM_RET_OP regRLC_SPM_SE_RSPM_RET_OP; -#endif -typedef union RLC_SPM_SE_SAMPLEDELAY_IND_ADDR regRLC_SPM_SE_SAMPLEDELAY_IND_ADDR; -typedef union RLC_SPM_SE_SAMPLEDELAY_IND_DATA regRLC_SPM_SE_SAMPLEDELAY_IND_DATA; -typedef union RLC_SPM_SE_SAMPLE_SKEW regRLC_SPM_SE_SAMPLE_SKEW; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union RLC_SPM_SPARE regRLC_SPM_SPARE; -#endif -typedef union RLC_SPM_SPI_PERFMON_SAMPLE_DELAY regRLC_SPM_SPI_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_SQG_PERFMON_SAMPLE_DELAY regRLC_SPM_SQG_PERFMON_SAMPLE_DELAY; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union RLC_SPM_STATUS regRLC_SPM_STATUS; -#endif -typedef union RLC_SPM_SX_PERFMON_SAMPLE_DELAY regRLC_SPM_SX_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_TA_PERFMON_SAMPLE_DELAY regRLC_SPM_TA_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_TCA_PERFMON_SAMPLE_DELAY regRLC_SPM_TCA_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_TCC_PERFMON_SAMPLE_DELAY regRLC_SPM_TCC_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_TCP_PERFMON_SAMPLE_DELAY regRLC_SPM_TCP_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_TD_PERFMON_SAMPLE_DELAY regRLC_SPM_TD_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_THREAD_TRACE_CTRL regRLC_SPM_THREAD_TRACE_CTRL; -typedef union RLC_SPM_UTCL1_CNTL regRLC_SPM_UTCL1_CNTL; -typedef union RLC_SPM_UTCL1_ERROR_1 regRLC_SPM_UTCL1_ERROR_1; -typedef union RLC_SPM_UTCL1_ERROR_2 regRLC_SPM_UTCL1_ERROR_2; -typedef union RLC_SPM_VGT_PERFMON_SAMPLE_DELAY regRLC_SPM_VGT_PERFMON_SAMPLE_DELAY; -typedef union RLC_SPM_VIRT_CTRL regRLC_SPM_VIRT_CTRL; -typedef union RLC_SPM_VIRT_STATUS regRLC_SPM_VIRT_STATUS; -typedef union RMI_PERFCOUNTER0_HI regRMI_PERFCOUNTER0_HI; -typedef union RMI_PERFCOUNTER0_LO regRMI_PERFCOUNTER0_LO; -typedef union RMI_PERFCOUNTER0_SELECT regRMI_PERFCOUNTER0_SELECT; -typedef union RMI_PERFCOUNTER0_SELECT1 regRMI_PERFCOUNTER0_SELECT1; -typedef union RMI_PERFCOUNTER1_HI regRMI_PERFCOUNTER1_HI; -typedef union RMI_PERFCOUNTER1_LO regRMI_PERFCOUNTER1_LO; -typedef union RMI_PERFCOUNTER1_SELECT regRMI_PERFCOUNTER1_SELECT; -typedef union RMI_PERFCOUNTER2_HI regRMI_PERFCOUNTER2_HI; -typedef union RMI_PERFCOUNTER2_LO regRMI_PERFCOUNTER2_LO; -typedef union RMI_PERFCOUNTER2_SELECT regRMI_PERFCOUNTER2_SELECT; -typedef union RMI_PERFCOUNTER2_SELECT1 regRMI_PERFCOUNTER2_SELECT1; -typedef union RMI_PERFCOUNTER3_HI regRMI_PERFCOUNTER3_HI; -typedef union RMI_PERFCOUNTER3_LO regRMI_PERFCOUNTER3_LO; -typedef union RMI_PERFCOUNTER3_SELECT regRMI_PERFCOUNTER3_SELECT; -typedef union RMI_PERF_COUNTER_CNTL regRMI_PERF_COUNTER_CNTL; -typedef union RPB_PERFCOUNTER0_CFG regRPB_PERFCOUNTER0_CFG; -typedef union RPB_PERFCOUNTER1_CFG regRPB_PERFCOUNTER1_CFG; -typedef union RPB_PERFCOUNTER2_CFG regRPB_PERFCOUNTER2_CFG; -typedef union RPB_PERFCOUNTER3_CFG regRPB_PERFCOUNTER3_CFG; -typedef union RPB_PERFCOUNTER_HI regRPB_PERFCOUNTER_HI; -typedef union RPB_PERFCOUNTER_LO regRPB_PERFCOUNTER_LO; -typedef union RPB_PERFCOUNTER_RSLT_CNTL regRPB_PERFCOUNTER_RSLT_CNTL; -typedef union RPB_PERF_COUNTER_CNTL regRPB_PERF_COUNTER_CNTL; -typedef union SDMA0_PERFCNT_MISC_CNTL regSDMA0_PERFCNT_MISC_CNTL; -typedef union SDMA0_PERFCNT_PERFCOUNTER0_CFG regSDMA0_PERFCNT_PERFCOUNTER0_CFG; -typedef union SDMA0_PERFCNT_PERFCOUNTER1_CFG regSDMA0_PERFCNT_PERFCOUNTER1_CFG; -typedef union SDMA0_PERFCNT_PERFCOUNTER_HI regSDMA0_PERFCNT_PERFCOUNTER_HI; -typedef union SDMA0_PERFCNT_PERFCOUNTER_LO regSDMA0_PERFCNT_PERFCOUNTER_LO; -typedef union SDMA0_PERFCNT_PERFCOUNTER_RSLT_CNTL regSDMA0_PERFCNT_PERFCOUNTER_RSLT_CNTL; -typedef union SDMA0_PERFCOUNTER0_HI regSDMA0_PERFCOUNTER0_HI; -typedef union SDMA0_PERFCOUNTER0_LO regSDMA0_PERFCOUNTER0_LO; -typedef union SDMA0_PERFCOUNTER0_RESULT regSDMA0_PERFCOUNTER0_RESULT; -typedef union SDMA0_PERFCOUNTER0_SELECT regSDMA0_PERFCOUNTER0_SELECT; -typedef union SDMA0_PERFCOUNTER0_SELECT1 regSDMA0_PERFCOUNTER0_SELECT1; -typedef union SDMA0_PERFCOUNTER1_HI regSDMA0_PERFCOUNTER1_HI; -typedef union SDMA0_PERFCOUNTER1_LO regSDMA0_PERFCOUNTER1_LO; -typedef union SDMA0_PERFCOUNTER1_RESULT regSDMA0_PERFCOUNTER1_RESULT; -typedef union SDMA0_PERFCOUNTER1_SELECT regSDMA0_PERFCOUNTER1_SELECT; -typedef union SDMA0_PERFCOUNTER1_SELECT1 regSDMA0_PERFCOUNTER1_SELECT1; -typedef union SDMA0_PERFCOUNTER_TAG_DELAY_RANGE regSDMA0_PERFCOUNTER_TAG_DELAY_RANGE; -typedef union SDMA0_PERFMON_CNTL regSDMA0_PERFMON_CNTL; -#if CHIP_HDR_NAVI21 || CHIP_HDR_NAVI22 || CHIP_HDR_NAVI23 || CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 -typedef union SDMA1_PERFCNT_MISC_CNTL regSDMA1_PERFCNT_MISC_CNTL; -typedef union SDMA1_PERFCNT_PERFCOUNTER0_CFG regSDMA1_PERFCNT_PERFCOUNTER0_CFG; -typedef union SDMA1_PERFCNT_PERFCOUNTER1_CFG regSDMA1_PERFCNT_PERFCOUNTER1_CFG; -typedef union SDMA1_PERFCNT_PERFCOUNTER_HI regSDMA1_PERFCNT_PERFCOUNTER_HI; -typedef union SDMA1_PERFCNT_PERFCOUNTER_LO regSDMA1_PERFCNT_PERFCOUNTER_LO; -typedef union SDMA1_PERFCNT_PERFCOUNTER_RSLT_CNTL regSDMA1_PERFCNT_PERFCOUNTER_RSLT_CNTL; -#endif -typedef union SDMA1_PERFCOUNTER0_HI regSDMA1_PERFCOUNTER0_HI; -typedef union SDMA1_PERFCOUNTER0_LO regSDMA1_PERFCOUNTER0_LO; -typedef union SDMA1_PERFCOUNTER0_RESULT regSDMA1_PERFCOUNTER0_RESULT; -typedef union SDMA1_PERFCOUNTER0_SELECT regSDMA1_PERFCOUNTER0_SELECT; -typedef union SDMA1_PERFCOUNTER0_SELECT1 regSDMA1_PERFCOUNTER0_SELECT1; -typedef union SDMA1_PERFCOUNTER1_HI regSDMA1_PERFCOUNTER1_HI; -typedef union SDMA1_PERFCOUNTER1_LO regSDMA1_PERFCOUNTER1_LO; -typedef union SDMA1_PERFCOUNTER1_RESULT regSDMA1_PERFCOUNTER1_RESULT; -typedef union SDMA1_PERFCOUNTER1_SELECT regSDMA1_PERFCOUNTER1_SELECT; -typedef union SDMA1_PERFCOUNTER1_SELECT1 regSDMA1_PERFCOUNTER1_SELECT1; -typedef union SDMA1_PERFCOUNTER_TAG_DELAY_RANGE regSDMA1_PERFCOUNTER_TAG_DELAY_RANGE; -typedef union SDMA1_PERFMON_CNTL regSDMA1_PERFMON_CNTL; -#if CHIP_HDR_NAVI21 -typedef union SDMA2_PERFCNT_MISC_CNTL regSDMA2_PERFCNT_MISC_CNTL; -typedef union SDMA2_PERFCNT_PERFCOUNTER0_CFG regSDMA2_PERFCNT_PERFCOUNTER0_CFG; -typedef union SDMA2_PERFCNT_PERFCOUNTER1_CFG regSDMA2_PERFCNT_PERFCOUNTER1_CFG; -typedef union SDMA2_PERFCNT_PERFCOUNTER_HI regSDMA2_PERFCNT_PERFCOUNTER_HI; -typedef union SDMA2_PERFCNT_PERFCOUNTER_LO regSDMA2_PERFCNT_PERFCOUNTER_LO; -typedef union SDMA2_PERFCNT_PERFCOUNTER_RSLT_CNTL regSDMA2_PERFCNT_PERFCOUNTER_RSLT_CNTL; -typedef union SDMA2_PERFCOUNTER0_HI regSDMA2_PERFCOUNTER0_HI; -typedef union SDMA2_PERFCOUNTER0_LO regSDMA2_PERFCOUNTER0_LO; -typedef union SDMA2_PERFCOUNTER0_SELECT regSDMA2_PERFCOUNTER0_SELECT; -typedef union SDMA2_PERFCOUNTER0_SELECT1 regSDMA2_PERFCOUNTER0_SELECT1; -typedef union SDMA2_PERFCOUNTER1_HI regSDMA2_PERFCOUNTER1_HI; -typedef union SDMA2_PERFCOUNTER1_LO regSDMA2_PERFCOUNTER1_LO; -typedef union SDMA2_PERFCOUNTER1_SELECT regSDMA2_PERFCOUNTER1_SELECT; -typedef union SDMA2_PERFCOUNTER1_SELECT1 regSDMA2_PERFCOUNTER1_SELECT1; -typedef union SDMA3_PERFCNT_MISC_CNTL regSDMA3_PERFCNT_MISC_CNTL; -typedef union SDMA3_PERFCNT_PERFCOUNTER0_CFG regSDMA3_PERFCNT_PERFCOUNTER0_CFG; -typedef union SDMA3_PERFCNT_PERFCOUNTER1_CFG regSDMA3_PERFCNT_PERFCOUNTER1_CFG; -typedef union SDMA3_PERFCNT_PERFCOUNTER_HI regSDMA3_PERFCNT_PERFCOUNTER_HI; -typedef union SDMA3_PERFCNT_PERFCOUNTER_LO regSDMA3_PERFCNT_PERFCOUNTER_LO; -typedef union SDMA3_PERFCNT_PERFCOUNTER_RSLT_CNTL regSDMA3_PERFCNT_PERFCOUNTER_RSLT_CNTL; -typedef union SDMA3_PERFCOUNTER0_HI regSDMA3_PERFCOUNTER0_HI; -typedef union SDMA3_PERFCOUNTER0_LO regSDMA3_PERFCOUNTER0_LO; -typedef union SDMA3_PERFCOUNTER0_SELECT regSDMA3_PERFCOUNTER0_SELECT; -typedef union SDMA3_PERFCOUNTER0_SELECT1 regSDMA3_PERFCOUNTER0_SELECT1; -typedef union SDMA3_PERFCOUNTER1_HI regSDMA3_PERFCOUNTER1_HI; -typedef union SDMA3_PERFCOUNTER1_LO regSDMA3_PERFCOUNTER1_LO; -typedef union SDMA3_PERFCOUNTER1_SELECT regSDMA3_PERFCOUNTER1_SELECT; -typedef union SDMA3_PERFCOUNTER1_SELECT1 regSDMA3_PERFCOUNTER1_SELECT1; -#endif -typedef union SPI_ARB_CNTL_0 regSPI_ARB_CNTL_0; -typedef union SPI_ARB_CYCLES_0 regSPI_ARB_CYCLES_0; -typedef union SPI_ARB_CYCLES_1 regSPI_ARB_CYCLES_1; -typedef union SPI_ARB_PRIORITY regSPI_ARB_PRIORITY; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union SPI_ATTRIBUTE_RING_BASE regSPI_ATTRIBUTE_RING_BASE; -typedef union SPI_ATTRIBUTE_RING_SIZE regSPI_ATTRIBUTE_RING_SIZE; -#endif -typedef union SPI_BARYC_CNTL regSPI_BARYC_CNTL; -typedef union SPI_BARYC_SSAA_CNTL regSPI_BARYC_SSAA_CNTL; -typedef union SPI_COMPUTE_QUEUE_RESET regSPI_COMPUTE_QUEUE_RESET; -typedef union SPI_COMPUTE_WF_CTX_SAVE regSPI_COMPUTE_WF_CTX_SAVE; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union SPI_COMPUTE_WF_CTX_SAVE_STATUS regSPI_COMPUTE_WF_CTX_SAVE_STATUS; -#endif -typedef union SPI_CONFIG_CNTL regSPI_CONFIG_CNTL; -typedef union SPI_CONFIG_CNTL_1 regSPI_CONFIG_CNTL_1; -typedef union SPI_CONFIG_CNTL_1_REMAP regSPI_CONFIG_CNTL_1_REMAP; -typedef union SPI_CONFIG_CNTL_2 regSPI_CONFIG_CNTL_2; -typedef union SPI_CONFIG_CNTL_2_REMAP regSPI_CONFIG_CNTL_2_REMAP; -typedef union SPI_CONFIG_CNTL_REMAP regSPI_CONFIG_CNTL_REMAP; -typedef union SPI_CONFIG_PS_CU_EN regSPI_CONFIG_PS_CU_EN; -typedef union SPI_CSG_PIPE_CONTROL regSPI_CSG_PIPE_CONTROL; -typedef union SPI_CSQ_WF_ACTIVE_COUNT_0 regSPI_CSQ_WF_ACTIVE_COUNT_0; -typedef union SPI_CSQ_WF_ACTIVE_COUNT_1 regSPI_CSQ_WF_ACTIVE_COUNT_1; -typedef union SPI_CSQ_WF_ACTIVE_COUNT_2 regSPI_CSQ_WF_ACTIVE_COUNT_2; -typedef union SPI_CSQ_WF_ACTIVE_COUNT_3 regSPI_CSQ_WF_ACTIVE_COUNT_3; -typedef union SPI_CSQ_WF_ACTIVE_COUNT_4 regSPI_CSQ_WF_ACTIVE_COUNT_4; -typedef union SPI_CSQ_WF_ACTIVE_COUNT_5 regSPI_CSQ_WF_ACTIVE_COUNT_5; -typedef union SPI_CSQ_WF_ACTIVE_COUNT_6 regSPI_CSQ_WF_ACTIVE_COUNT_6; -typedef union SPI_CSQ_WF_ACTIVE_COUNT_7 regSPI_CSQ_WF_ACTIVE_COUNT_7; -typedef union SPI_CSQ_WF_ACTIVE_STATUS regSPI_CSQ_WF_ACTIVE_STATUS; -typedef union SPI_CS_CRAWLER_CONFIG regSPI_CS_CRAWLER_CONFIG; -typedef union SPI_DSM_CNTL regSPI_DSM_CNTL; -typedef union SPI_DSM_CNTL2 regSPI_DSM_CNTL2; -typedef union SPI_EDC_CNT regSPI_EDC_CNT; -typedef union SPI_EXP_THROTTLE_CTRL regSPI_EXP_THROTTLE_CTRL; -typedef union SPI_FEATURE_CTRL regSPI_FEATURE_CTRL; -typedef union SPI_GDS_CREDITS regSPI_GDS_CREDITS; -typedef union SPI_GFX_CNTL regSPI_GFX_CNTL; -typedef union SPI_GFX_CRAWLER_CONFIG regSPI_GFX_CRAWLER_CONFIG; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union SPI_GFX_SCRATCH_BASE_HI regSPI_GFX_SCRATCH_BASE_HI; -typedef union SPI_GFX_SCRATCH_BASE_LO regSPI_GFX_SCRATCH_BASE_LO; -typedef union SPI_GS_THROTTLE_CNTL1 regSPI_GS_THROTTLE_CNTL1; -typedef union SPI_GS_THROTTLE_CNTL2 regSPI_GS_THROTTLE_CNTL2; -#endif -typedef union SPI_IND_DATA regSPI_IND_DATA; -typedef union SPI_IND_INDEX regSPI_IND_INDEX; -typedef union SPI_INTERP_CONTROL_0 regSPI_INTERP_CONTROL_0; -typedef union SPI_LB_CTR_CTRL regSPI_LB_CTR_CTRL; -typedef union SPI_LB_CU_MASK regSPI_LB_CU_MASK; -typedef union SPI_LB_DATA_PERCU_WAVE_CS regSPI_LB_DATA_PERCU_WAVE_CS; -typedef union SPI_LB_DATA_PERCU_WAVE_HSGS regSPI_LB_DATA_PERCU_WAVE_HSGS; -typedef union SPI_LB_DATA_PERCU_WAVE_VSPS regSPI_LB_DATA_PERCU_WAVE_VSPS; -typedef union SPI_LB_DATA_PERWGP_WAVE_CS regSPI_LB_DATA_PERWGP_WAVE_CS; -typedef union SPI_LB_DATA_PERWGP_WAVE_HSGS regSPI_LB_DATA_PERWGP_WAVE_HSGS; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union SPI_LB_DATA_PERWGP_WAVE_PS regSPI_LB_DATA_PERWGP_WAVE_PS; -#endif -typedef union SPI_LB_DATA_PERWGP_WAVE_VSPS regSPI_LB_DATA_PERWGP_WAVE_VSPS; -typedef union SPI_LB_DATA_REG regSPI_LB_DATA_REG; -typedef union SPI_LB_DATA_WAVES regSPI_LB_DATA_WAVES; -typedef union SPI_LB_WGP_MASK regSPI_LB_WGP_MASK; -typedef union SPI_P0_TRAP_SCREEN_GPR_MIN regSPI_P0_TRAP_SCREEN_GPR_MIN; -typedef union SPI_P0_TRAP_SCREEN_PSBA_HI regSPI_P0_TRAP_SCREEN_PSBA_HI; -typedef union SPI_P0_TRAP_SCREEN_PSBA_LO regSPI_P0_TRAP_SCREEN_PSBA_LO; -typedef union SPI_P0_TRAP_SCREEN_PSMA_HI regSPI_P0_TRAP_SCREEN_PSMA_HI; -typedef union SPI_P0_TRAP_SCREEN_PSMA_LO regSPI_P0_TRAP_SCREEN_PSMA_LO; -typedef union SPI_P1_TRAP_SCREEN_GPR_MIN regSPI_P1_TRAP_SCREEN_GPR_MIN; -typedef union SPI_P1_TRAP_SCREEN_PSBA_HI regSPI_P1_TRAP_SCREEN_PSBA_HI; -typedef union SPI_P1_TRAP_SCREEN_PSBA_LO regSPI_P1_TRAP_SCREEN_PSBA_LO; -typedef union SPI_P1_TRAP_SCREEN_PSMA_HI regSPI_P1_TRAP_SCREEN_PSMA_HI; -typedef union SPI_P1_TRAP_SCREEN_PSMA_LO regSPI_P1_TRAP_SCREEN_PSMA_LO; -typedef union SPI_PERFCOUNTER0_HI regSPI_PERFCOUNTER0_HI; -typedef union SPI_PERFCOUNTER0_LO regSPI_PERFCOUNTER0_LO; -typedef union SPI_PERFCOUNTER0_SELECT regSPI_PERFCOUNTER0_SELECT; -typedef union SPI_PERFCOUNTER0_SELECT1 regSPI_PERFCOUNTER0_SELECT1; -typedef union SPI_PERFCOUNTER1_HI regSPI_PERFCOUNTER1_HI; -typedef union SPI_PERFCOUNTER1_LO regSPI_PERFCOUNTER1_LO; -typedef union SPI_PERFCOUNTER1_SELECT regSPI_PERFCOUNTER1_SELECT; -typedef union SPI_PERFCOUNTER1_SELECT1 regSPI_PERFCOUNTER1_SELECT1; -typedef union SPI_PERFCOUNTER2_HI regSPI_PERFCOUNTER2_HI; -typedef union SPI_PERFCOUNTER2_LO regSPI_PERFCOUNTER2_LO; -typedef union SPI_PERFCOUNTER2_SELECT regSPI_PERFCOUNTER2_SELECT; -typedef union SPI_PERFCOUNTER2_SELECT1 regSPI_PERFCOUNTER2_SELECT1; -typedef union SPI_PERFCOUNTER3_HI regSPI_PERFCOUNTER3_HI; -typedef union SPI_PERFCOUNTER3_LO regSPI_PERFCOUNTER3_LO; -typedef union SPI_PERFCOUNTER3_SELECT regSPI_PERFCOUNTER3_SELECT; -typedef union SPI_PERFCOUNTER3_SELECT1 regSPI_PERFCOUNTER3_SELECT1; -typedef union SPI_PERFCOUNTER4_HI regSPI_PERFCOUNTER4_HI; -typedef union SPI_PERFCOUNTER4_LO regSPI_PERFCOUNTER4_LO; -typedef union SPI_PERFCOUNTER4_SELECT regSPI_PERFCOUNTER4_SELECT; -typedef union SPI_PERFCOUNTER5_HI regSPI_PERFCOUNTER5_HI; -typedef union SPI_PERFCOUNTER5_LO regSPI_PERFCOUNTER5_LO; -typedef union SPI_PERFCOUNTER5_SELECT regSPI_PERFCOUNTER5_SELECT; -typedef union SPI_PERFCOUNTER_BINS regSPI_PERFCOUNTER_BINS; -typedef union SPI_PG_ENABLE_STATIC_CU_MASK regSPI_PG_ENABLE_STATIC_CU_MASK; -typedef union SPI_PG_ENABLE_STATIC_WGP_MASK regSPI_PG_ENABLE_STATIC_WGP_MASK; -typedef union SPI_PQEV_CTRL regSPI_PQEV_CTRL; -typedef union SPI_PS_INPUT_ADDR regSPI_PS_INPUT_ADDR; -typedef union SPI_PS_INPUT_CNTL_0 regSPI_PS_INPUT_CNTL_0; -typedef union SPI_PS_INPUT_CNTL_1 regSPI_PS_INPUT_CNTL_1; -typedef union SPI_PS_INPUT_CNTL_2 regSPI_PS_INPUT_CNTL_2; -typedef union SPI_PS_INPUT_CNTL_3 regSPI_PS_INPUT_CNTL_3; -typedef union SPI_PS_INPUT_CNTL_4 regSPI_PS_INPUT_CNTL_4; -typedef union SPI_PS_INPUT_CNTL_5 regSPI_PS_INPUT_CNTL_5; -typedef union SPI_PS_INPUT_CNTL_6 regSPI_PS_INPUT_CNTL_6; -typedef union SPI_PS_INPUT_CNTL_7 regSPI_PS_INPUT_CNTL_7; -typedef union SPI_PS_INPUT_CNTL_8 regSPI_PS_INPUT_CNTL_8; -typedef union SPI_PS_INPUT_CNTL_9 regSPI_PS_INPUT_CNTL_9; -typedef union SPI_PS_INPUT_CNTL_10 regSPI_PS_INPUT_CNTL_10; -typedef union SPI_PS_INPUT_CNTL_11 regSPI_PS_INPUT_CNTL_11; -typedef union SPI_PS_INPUT_CNTL_12 regSPI_PS_INPUT_CNTL_12; -typedef union SPI_PS_INPUT_CNTL_13 regSPI_PS_INPUT_CNTL_13; -typedef union SPI_PS_INPUT_CNTL_14 regSPI_PS_INPUT_CNTL_14; -typedef union SPI_PS_INPUT_CNTL_15 regSPI_PS_INPUT_CNTL_15; -typedef union SPI_PS_INPUT_CNTL_16 regSPI_PS_INPUT_CNTL_16; -typedef union SPI_PS_INPUT_CNTL_17 regSPI_PS_INPUT_CNTL_17; -typedef union SPI_PS_INPUT_CNTL_18 regSPI_PS_INPUT_CNTL_18; -typedef union SPI_PS_INPUT_CNTL_19 regSPI_PS_INPUT_CNTL_19; -typedef union SPI_PS_INPUT_CNTL_20 regSPI_PS_INPUT_CNTL_20; -typedef union SPI_PS_INPUT_CNTL_21 regSPI_PS_INPUT_CNTL_21; -typedef union SPI_PS_INPUT_CNTL_22 regSPI_PS_INPUT_CNTL_22; -typedef union SPI_PS_INPUT_CNTL_23 regSPI_PS_INPUT_CNTL_23; -typedef union SPI_PS_INPUT_CNTL_24 regSPI_PS_INPUT_CNTL_24; -typedef union SPI_PS_INPUT_CNTL_25 regSPI_PS_INPUT_CNTL_25; -typedef union SPI_PS_INPUT_CNTL_26 regSPI_PS_INPUT_CNTL_26; -typedef union SPI_PS_INPUT_CNTL_27 regSPI_PS_INPUT_CNTL_27; -typedef union SPI_PS_INPUT_CNTL_28 regSPI_PS_INPUT_CNTL_28; -typedef union SPI_PS_INPUT_CNTL_29 regSPI_PS_INPUT_CNTL_29; -typedef union SPI_PS_INPUT_CNTL_30 regSPI_PS_INPUT_CNTL_30; -typedef union SPI_PS_INPUT_CNTL_31 regSPI_PS_INPUT_CNTL_31; -typedef union SPI_PS_INPUT_ENA regSPI_PS_INPUT_ENA; -typedef union SPI_PS_IN_CONTROL regSPI_PS_IN_CONTROL; -typedef union SPI_PS_MAX_WAVE_ID regSPI_PS_MAX_WAVE_ID; -typedef union SPI_RESOURCE_RESERVE_CU_0 regSPI_RESOURCE_RESERVE_CU_0; -typedef union SPI_RESOURCE_RESERVE_CU_1 regSPI_RESOURCE_RESERVE_CU_1; -typedef union SPI_RESOURCE_RESERVE_CU_2 regSPI_RESOURCE_RESERVE_CU_2; -typedef union SPI_RESOURCE_RESERVE_CU_3 regSPI_RESOURCE_RESERVE_CU_3; -typedef union SPI_RESOURCE_RESERVE_CU_4 regSPI_RESOURCE_RESERVE_CU_4; -typedef union SPI_RESOURCE_RESERVE_CU_5 regSPI_RESOURCE_RESERVE_CU_5; -typedef union SPI_RESOURCE_RESERVE_CU_6 regSPI_RESOURCE_RESERVE_CU_6; -typedef union SPI_RESOURCE_RESERVE_CU_7 regSPI_RESOURCE_RESERVE_CU_7; -typedef union SPI_RESOURCE_RESERVE_CU_8 regSPI_RESOURCE_RESERVE_CU_8; -typedef union SPI_RESOURCE_RESERVE_CU_9 regSPI_RESOURCE_RESERVE_CU_9; -typedef union SPI_RESOURCE_RESERVE_CU_10 regSPI_RESOURCE_RESERVE_CU_10; -typedef union SPI_RESOURCE_RESERVE_CU_11 regSPI_RESOURCE_RESERVE_CU_11; -typedef union SPI_RESOURCE_RESERVE_CU_12 regSPI_RESOURCE_RESERVE_CU_12; -typedef union SPI_RESOURCE_RESERVE_CU_13 regSPI_RESOURCE_RESERVE_CU_13; -typedef union SPI_RESOURCE_RESERVE_CU_14 regSPI_RESOURCE_RESERVE_CU_14; -typedef union SPI_RESOURCE_RESERVE_CU_15 regSPI_RESOURCE_RESERVE_CU_15; -typedef union SPI_RESOURCE_RESERVE_EN_CU_0 regSPI_RESOURCE_RESERVE_EN_CU_0; -typedef union SPI_RESOURCE_RESERVE_EN_CU_1 regSPI_RESOURCE_RESERVE_EN_CU_1; -typedef union SPI_RESOURCE_RESERVE_EN_CU_2 regSPI_RESOURCE_RESERVE_EN_CU_2; -typedef union SPI_RESOURCE_RESERVE_EN_CU_3 regSPI_RESOURCE_RESERVE_EN_CU_3; -typedef union SPI_RESOURCE_RESERVE_EN_CU_4 regSPI_RESOURCE_RESERVE_EN_CU_4; -typedef union SPI_RESOURCE_RESERVE_EN_CU_5 regSPI_RESOURCE_RESERVE_EN_CU_5; -typedef union SPI_RESOURCE_RESERVE_EN_CU_6 regSPI_RESOURCE_RESERVE_EN_CU_6; -typedef union SPI_RESOURCE_RESERVE_EN_CU_7 regSPI_RESOURCE_RESERVE_EN_CU_7; -typedef union SPI_RESOURCE_RESERVE_EN_CU_8 regSPI_RESOURCE_RESERVE_EN_CU_8; -typedef union SPI_RESOURCE_RESERVE_EN_CU_9 regSPI_RESOURCE_RESERVE_EN_CU_9; -typedef union SPI_RESOURCE_RESERVE_EN_CU_10 regSPI_RESOURCE_RESERVE_EN_CU_10; -typedef union SPI_RESOURCE_RESERVE_EN_CU_11 regSPI_RESOURCE_RESERVE_EN_CU_11; -typedef union SPI_RESOURCE_RESERVE_EN_CU_12 regSPI_RESOURCE_RESERVE_EN_CU_12; -typedef union SPI_RESOURCE_RESERVE_EN_CU_13 regSPI_RESOURCE_RESERVE_EN_CU_13; -typedef union SPI_RESOURCE_RESERVE_EN_CU_14 regSPI_RESOURCE_RESERVE_EN_CU_14; -typedef union SPI_RESOURCE_RESERVE_EN_CU_15 regSPI_RESOURCE_RESERVE_EN_CU_15; -typedef union SPI_SHADER_COL_FORMAT regSPI_SHADER_COL_FORMAT; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union SPI_SHADER_GS_MESHLET_DIM regSPI_SHADER_GS_MESHLET_DIM; -typedef union SPI_SHADER_GS_MESHLET_EXP_ALLOC regSPI_SHADER_GS_MESHLET_EXP_ALLOC; -#endif -typedef union SPI_SHADER_IDX_FORMAT regSPI_SHADER_IDX_FORMAT; -typedef union SPI_SHADER_LATE_ALLOC_VS regSPI_SHADER_LATE_ALLOC_VS; -typedef union SPI_SHADER_PGM_CHKSUM_GS regSPI_SHADER_PGM_CHKSUM_GS; -typedef union SPI_SHADER_PGM_CHKSUM_HS regSPI_SHADER_PGM_CHKSUM_HS; -typedef union SPI_SHADER_PGM_CHKSUM_PS regSPI_SHADER_PGM_CHKSUM_PS; -typedef union SPI_SHADER_PGM_CHKSUM_VS regSPI_SHADER_PGM_CHKSUM_VS; -typedef union SPI_SHADER_PGM_HI_ES regSPI_SHADER_PGM_HI_ES; -typedef union SPI_SHADER_PGM_HI_ES_GS regSPI_SHADER_PGM_HI_ES_GS; -typedef union SPI_SHADER_PGM_HI_GS regSPI_SHADER_PGM_HI_GS; -typedef union SPI_SHADER_PGM_HI_HS regSPI_SHADER_PGM_HI_HS; -typedef union SPI_SHADER_PGM_HI_LS regSPI_SHADER_PGM_HI_LS; -typedef union SPI_SHADER_PGM_HI_LS_HS regSPI_SHADER_PGM_HI_LS_HS; -typedef union SPI_SHADER_PGM_HI_PS regSPI_SHADER_PGM_HI_PS; -typedef union SPI_SHADER_PGM_HI_VS regSPI_SHADER_PGM_HI_VS; -typedef union SPI_SHADER_PGM_LO_ES regSPI_SHADER_PGM_LO_ES; -typedef union SPI_SHADER_PGM_LO_ES_GS regSPI_SHADER_PGM_LO_ES_GS; -typedef union SPI_SHADER_PGM_LO_GS regSPI_SHADER_PGM_LO_GS; -typedef union SPI_SHADER_PGM_LO_HS regSPI_SHADER_PGM_LO_HS; -typedef union SPI_SHADER_PGM_LO_LS regSPI_SHADER_PGM_LO_LS; -typedef union SPI_SHADER_PGM_LO_LS_HS regSPI_SHADER_PGM_LO_LS_HS; -typedef union SPI_SHADER_PGM_LO_PS regSPI_SHADER_PGM_LO_PS; -typedef union SPI_SHADER_PGM_LO_VS regSPI_SHADER_PGM_LO_VS; -typedef union SPI_SHADER_PGM_RSRC1_ES regSPI_SHADER_PGM_RSRC1_ES; -typedef union SPI_SHADER_PGM_RSRC1_GS regSPI_SHADER_PGM_RSRC1_GS; -typedef union SPI_SHADER_PGM_RSRC1_HS regSPI_SHADER_PGM_RSRC1_HS; -typedef union SPI_SHADER_PGM_RSRC1_LS regSPI_SHADER_PGM_RSRC1_LS; -typedef union SPI_SHADER_PGM_RSRC1_PS regSPI_SHADER_PGM_RSRC1_PS; -typedef union SPI_SHADER_PGM_RSRC1_VS regSPI_SHADER_PGM_RSRC1_VS; -typedef union SPI_SHADER_PGM_RSRC2_ES regSPI_SHADER_PGM_RSRC2_ES; -typedef union SPI_SHADER_PGM_RSRC2_ES_GS regSPI_SHADER_PGM_RSRC2_ES_GS; -typedef union SPI_SHADER_PGM_RSRC2_ES_VS regSPI_SHADER_PGM_RSRC2_ES_VS; -typedef union SPI_SHADER_PGM_RSRC2_GS regSPI_SHADER_PGM_RSRC2_GS; -typedef union SPI_SHADER_PGM_RSRC2_GS_VS regSPI_SHADER_PGM_RSRC2_GS_VS; -typedef union SPI_SHADER_PGM_RSRC2_HS regSPI_SHADER_PGM_RSRC2_HS; -typedef union SPI_SHADER_PGM_RSRC2_LS regSPI_SHADER_PGM_RSRC2_LS; -typedef union SPI_SHADER_PGM_RSRC2_LS_ES regSPI_SHADER_PGM_RSRC2_LS_ES; -typedef union SPI_SHADER_PGM_RSRC2_LS_HS regSPI_SHADER_PGM_RSRC2_LS_HS; -typedef union SPI_SHADER_PGM_RSRC2_LS_VS regSPI_SHADER_PGM_RSRC2_LS_VS; -typedef union SPI_SHADER_PGM_RSRC2_PS regSPI_SHADER_PGM_RSRC2_PS; -typedef union SPI_SHADER_PGM_RSRC2_VS regSPI_SHADER_PGM_RSRC2_VS; -typedef union SPI_SHADER_PGM_RSRC3_ES regSPI_SHADER_PGM_RSRC3_ES; -typedef union SPI_SHADER_PGM_RSRC3_GS regSPI_SHADER_PGM_RSRC3_GS; -typedef union SPI_SHADER_PGM_RSRC3_HS regSPI_SHADER_PGM_RSRC3_HS; -typedef union SPI_SHADER_PGM_RSRC3_LS regSPI_SHADER_PGM_RSRC3_LS; -typedef union SPI_SHADER_PGM_RSRC3_PS regSPI_SHADER_PGM_RSRC3_PS; -typedef union SPI_SHADER_PGM_RSRC3_VS regSPI_SHADER_PGM_RSRC3_VS; -typedef union SPI_SHADER_PGM_RSRC4_GS regSPI_SHADER_PGM_RSRC4_GS; -typedef union SPI_SHADER_PGM_RSRC4_HS regSPI_SHADER_PGM_RSRC4_HS; -typedef union SPI_SHADER_PGM_RSRC4_PS regSPI_SHADER_PGM_RSRC4_PS; -typedef union SPI_SHADER_PGM_RSRC4_VS regSPI_SHADER_PGM_RSRC4_VS; -typedef union SPI_SHADER_POS_FORMAT regSPI_SHADER_POS_FORMAT; -typedef union SPI_SHADER_PREF_PRI_ACCUM_ESGS_0 regSPI_SHADER_PREF_PRI_ACCUM_ESGS_0; -typedef union SPI_SHADER_PREF_PRI_ACCUM_ESGS_1 regSPI_SHADER_PREF_PRI_ACCUM_ESGS_1; -typedef union SPI_SHADER_PREF_PRI_ACCUM_ESGS_2 regSPI_SHADER_PREF_PRI_ACCUM_ESGS_2; -typedef union SPI_SHADER_PREF_PRI_ACCUM_ESGS_3 regSPI_SHADER_PREF_PRI_ACCUM_ESGS_3; -typedef union SPI_SHADER_PREF_PRI_ACCUM_LSHS_0 regSPI_SHADER_PREF_PRI_ACCUM_LSHS_0; -typedef union SPI_SHADER_PREF_PRI_ACCUM_LSHS_1 regSPI_SHADER_PREF_PRI_ACCUM_LSHS_1; -typedef union SPI_SHADER_PREF_PRI_ACCUM_LSHS_2 regSPI_SHADER_PREF_PRI_ACCUM_LSHS_2; -typedef union SPI_SHADER_PREF_PRI_ACCUM_LSHS_3 regSPI_SHADER_PREF_PRI_ACCUM_LSHS_3; -typedef union SPI_SHADER_PREF_PRI_ACCUM_PS_0 regSPI_SHADER_PREF_PRI_ACCUM_PS_0; -typedef union SPI_SHADER_PREF_PRI_ACCUM_PS_1 regSPI_SHADER_PREF_PRI_ACCUM_PS_1; -typedef union SPI_SHADER_PREF_PRI_ACCUM_PS_2 regSPI_SHADER_PREF_PRI_ACCUM_PS_2; -typedef union SPI_SHADER_PREF_PRI_ACCUM_PS_3 regSPI_SHADER_PREF_PRI_ACCUM_PS_3; -typedef union SPI_SHADER_PREF_PRI_ACCUM_VS_0 regSPI_SHADER_PREF_PRI_ACCUM_VS_0; -typedef union SPI_SHADER_PREF_PRI_ACCUM_VS_1 regSPI_SHADER_PREF_PRI_ACCUM_VS_1; -typedef union SPI_SHADER_PREF_PRI_ACCUM_VS_2 regSPI_SHADER_PREF_PRI_ACCUM_VS_2; -typedef union SPI_SHADER_PREF_PRI_ACCUM_VS_3 regSPI_SHADER_PREF_PRI_ACCUM_VS_3; -typedef union SPI_SHADER_PREF_PRI_CNTR_CTRL_ESGS regSPI_SHADER_PREF_PRI_CNTR_CTRL_ESGS; -typedef union SPI_SHADER_PREF_PRI_CNTR_CTRL_LSHS regSPI_SHADER_PREF_PRI_CNTR_CTRL_LSHS; -typedef union SPI_SHADER_PREF_PRI_CNTR_CTRL_PS regSPI_SHADER_PREF_PRI_CNTR_CTRL_PS; -typedef union SPI_SHADER_PREF_PRI_CNTR_CTRL_VS regSPI_SHADER_PREF_PRI_CNTR_CTRL_VS; -typedef union SPI_SHADER_REQ_CTRL_ESGS regSPI_SHADER_REQ_CTRL_ESGS; -typedef union SPI_SHADER_REQ_CTRL_LSHS regSPI_SHADER_REQ_CTRL_LSHS; -typedef union SPI_SHADER_REQ_CTRL_PS regSPI_SHADER_REQ_CTRL_PS; -typedef union SPI_SHADER_REQ_CTRL_VS regSPI_SHADER_REQ_CTRL_VS; -typedef union SPI_SHADER_RSRC_LIMIT_CTRL regSPI_SHADER_RSRC_LIMIT_CTRL; -typedef union SPI_SHADER_USER_ACCUM_ESGS_0 regSPI_SHADER_USER_ACCUM_ESGS_0; -typedef union SPI_SHADER_USER_ACCUM_ESGS_1 regSPI_SHADER_USER_ACCUM_ESGS_1; -typedef union SPI_SHADER_USER_ACCUM_ESGS_2 regSPI_SHADER_USER_ACCUM_ESGS_2; -typedef union SPI_SHADER_USER_ACCUM_ESGS_3 regSPI_SHADER_USER_ACCUM_ESGS_3; -typedef union SPI_SHADER_USER_ACCUM_LSHS_0 regSPI_SHADER_USER_ACCUM_LSHS_0; -typedef union SPI_SHADER_USER_ACCUM_LSHS_1 regSPI_SHADER_USER_ACCUM_LSHS_1; -typedef union SPI_SHADER_USER_ACCUM_LSHS_2 regSPI_SHADER_USER_ACCUM_LSHS_2; -typedef union SPI_SHADER_USER_ACCUM_LSHS_3 regSPI_SHADER_USER_ACCUM_LSHS_3; -typedef union SPI_SHADER_USER_ACCUM_PS_0 regSPI_SHADER_USER_ACCUM_PS_0; -typedef union SPI_SHADER_USER_ACCUM_PS_1 regSPI_SHADER_USER_ACCUM_PS_1; -typedef union SPI_SHADER_USER_ACCUM_PS_2 regSPI_SHADER_USER_ACCUM_PS_2; -typedef union SPI_SHADER_USER_ACCUM_PS_3 regSPI_SHADER_USER_ACCUM_PS_3; -typedef union SPI_SHADER_USER_ACCUM_VS_0 regSPI_SHADER_USER_ACCUM_VS_0; -typedef union SPI_SHADER_USER_ACCUM_VS_1 regSPI_SHADER_USER_ACCUM_VS_1; -typedef union SPI_SHADER_USER_ACCUM_VS_2 regSPI_SHADER_USER_ACCUM_VS_2; -typedef union SPI_SHADER_USER_ACCUM_VS_3 regSPI_SHADER_USER_ACCUM_VS_3; -typedef union SPI_SHADER_USER_DATA_ADDR_HI_GS regSPI_SHADER_USER_DATA_ADDR_HI_GS; -typedef union SPI_SHADER_USER_DATA_ADDR_HI_HS regSPI_SHADER_USER_DATA_ADDR_HI_HS; -typedef union SPI_SHADER_USER_DATA_ADDR_LO_GS regSPI_SHADER_USER_DATA_ADDR_LO_GS; -typedef union SPI_SHADER_USER_DATA_ADDR_LO_HS regSPI_SHADER_USER_DATA_ADDR_LO_HS; -typedef union SPI_SHADER_USER_DATA_COMMON_0 regSPI_SHADER_USER_DATA_COMMON_0; -typedef union SPI_SHADER_USER_DATA_COMMON_1 regSPI_SHADER_USER_DATA_COMMON_1; -typedef union SPI_SHADER_USER_DATA_COMMON_2 regSPI_SHADER_USER_DATA_COMMON_2; -typedef union SPI_SHADER_USER_DATA_COMMON_3 regSPI_SHADER_USER_DATA_COMMON_3; -typedef union SPI_SHADER_USER_DATA_COMMON_4 regSPI_SHADER_USER_DATA_COMMON_4; -typedef union SPI_SHADER_USER_DATA_COMMON_5 regSPI_SHADER_USER_DATA_COMMON_5; -typedef union SPI_SHADER_USER_DATA_COMMON_6 regSPI_SHADER_USER_DATA_COMMON_6; -typedef union SPI_SHADER_USER_DATA_COMMON_7 regSPI_SHADER_USER_DATA_COMMON_7; -typedef union SPI_SHADER_USER_DATA_COMMON_8 regSPI_SHADER_USER_DATA_COMMON_8; -typedef union SPI_SHADER_USER_DATA_COMMON_9 regSPI_SHADER_USER_DATA_COMMON_9; -typedef union SPI_SHADER_USER_DATA_COMMON_10 regSPI_SHADER_USER_DATA_COMMON_10; -typedef union SPI_SHADER_USER_DATA_COMMON_11 regSPI_SHADER_USER_DATA_COMMON_11; -typedef union SPI_SHADER_USER_DATA_COMMON_12 regSPI_SHADER_USER_DATA_COMMON_12; -typedef union SPI_SHADER_USER_DATA_COMMON_13 regSPI_SHADER_USER_DATA_COMMON_13; -typedef union SPI_SHADER_USER_DATA_COMMON_14 regSPI_SHADER_USER_DATA_COMMON_14; -typedef union SPI_SHADER_USER_DATA_COMMON_15 regSPI_SHADER_USER_DATA_COMMON_15; -typedef union SPI_SHADER_USER_DATA_COMMON_16 regSPI_SHADER_USER_DATA_COMMON_16; -typedef union SPI_SHADER_USER_DATA_COMMON_17 regSPI_SHADER_USER_DATA_COMMON_17; -typedef union SPI_SHADER_USER_DATA_COMMON_18 regSPI_SHADER_USER_DATA_COMMON_18; -typedef union SPI_SHADER_USER_DATA_COMMON_19 regSPI_SHADER_USER_DATA_COMMON_19; -typedef union SPI_SHADER_USER_DATA_COMMON_20 regSPI_SHADER_USER_DATA_COMMON_20; -typedef union SPI_SHADER_USER_DATA_COMMON_21 regSPI_SHADER_USER_DATA_COMMON_21; -typedef union SPI_SHADER_USER_DATA_COMMON_22 regSPI_SHADER_USER_DATA_COMMON_22; -typedef union SPI_SHADER_USER_DATA_COMMON_23 regSPI_SHADER_USER_DATA_COMMON_23; -typedef union SPI_SHADER_USER_DATA_COMMON_24 regSPI_SHADER_USER_DATA_COMMON_24; -typedef union SPI_SHADER_USER_DATA_COMMON_25 regSPI_SHADER_USER_DATA_COMMON_25; -typedef union SPI_SHADER_USER_DATA_COMMON_26 regSPI_SHADER_USER_DATA_COMMON_26; -typedef union SPI_SHADER_USER_DATA_COMMON_27 regSPI_SHADER_USER_DATA_COMMON_27; -typedef union SPI_SHADER_USER_DATA_COMMON_28 regSPI_SHADER_USER_DATA_COMMON_28; -typedef union SPI_SHADER_USER_DATA_COMMON_29 regSPI_SHADER_USER_DATA_COMMON_29; -typedef union SPI_SHADER_USER_DATA_COMMON_30 regSPI_SHADER_USER_DATA_COMMON_30; -typedef union SPI_SHADER_USER_DATA_COMMON_31 regSPI_SHADER_USER_DATA_COMMON_31; -typedef union SPI_SHADER_USER_DATA_ES_0 regSPI_SHADER_USER_DATA_ES_0; -typedef union SPI_SHADER_USER_DATA_ES_1 regSPI_SHADER_USER_DATA_ES_1; -typedef union SPI_SHADER_USER_DATA_ES_2 regSPI_SHADER_USER_DATA_ES_2; -typedef union SPI_SHADER_USER_DATA_ES_3 regSPI_SHADER_USER_DATA_ES_3; -typedef union SPI_SHADER_USER_DATA_ES_4 regSPI_SHADER_USER_DATA_ES_4; -typedef union SPI_SHADER_USER_DATA_ES_5 regSPI_SHADER_USER_DATA_ES_5; -typedef union SPI_SHADER_USER_DATA_ES_6 regSPI_SHADER_USER_DATA_ES_6; -typedef union SPI_SHADER_USER_DATA_ES_7 regSPI_SHADER_USER_DATA_ES_7; -typedef union SPI_SHADER_USER_DATA_ES_8 regSPI_SHADER_USER_DATA_ES_8; -typedef union SPI_SHADER_USER_DATA_ES_9 regSPI_SHADER_USER_DATA_ES_9; -typedef union SPI_SHADER_USER_DATA_ES_10 regSPI_SHADER_USER_DATA_ES_10; -typedef union SPI_SHADER_USER_DATA_ES_11 regSPI_SHADER_USER_DATA_ES_11; -typedef union SPI_SHADER_USER_DATA_ES_12 regSPI_SHADER_USER_DATA_ES_12; -typedef union SPI_SHADER_USER_DATA_ES_13 regSPI_SHADER_USER_DATA_ES_13; -typedef union SPI_SHADER_USER_DATA_ES_14 regSPI_SHADER_USER_DATA_ES_14; -typedef union SPI_SHADER_USER_DATA_ES_15 regSPI_SHADER_USER_DATA_ES_15; -typedef union SPI_SHADER_USER_DATA_ES_16 regSPI_SHADER_USER_DATA_ES_16; -typedef union SPI_SHADER_USER_DATA_ES_17 regSPI_SHADER_USER_DATA_ES_17; -typedef union SPI_SHADER_USER_DATA_ES_18 regSPI_SHADER_USER_DATA_ES_18; -typedef union SPI_SHADER_USER_DATA_ES_19 regSPI_SHADER_USER_DATA_ES_19; -typedef union SPI_SHADER_USER_DATA_ES_20 regSPI_SHADER_USER_DATA_ES_20; -typedef union SPI_SHADER_USER_DATA_ES_21 regSPI_SHADER_USER_DATA_ES_21; -typedef union SPI_SHADER_USER_DATA_ES_22 regSPI_SHADER_USER_DATA_ES_22; -typedef union SPI_SHADER_USER_DATA_ES_23 regSPI_SHADER_USER_DATA_ES_23; -typedef union SPI_SHADER_USER_DATA_ES_24 regSPI_SHADER_USER_DATA_ES_24; -typedef union SPI_SHADER_USER_DATA_ES_25 regSPI_SHADER_USER_DATA_ES_25; -typedef union SPI_SHADER_USER_DATA_ES_26 regSPI_SHADER_USER_DATA_ES_26; -typedef union SPI_SHADER_USER_DATA_ES_27 regSPI_SHADER_USER_DATA_ES_27; -typedef union SPI_SHADER_USER_DATA_ES_28 regSPI_SHADER_USER_DATA_ES_28; -typedef union SPI_SHADER_USER_DATA_ES_29 regSPI_SHADER_USER_DATA_ES_29; -typedef union SPI_SHADER_USER_DATA_ES_30 regSPI_SHADER_USER_DATA_ES_30; -typedef union SPI_SHADER_USER_DATA_ES_31 regSPI_SHADER_USER_DATA_ES_31; -typedef union SPI_SHADER_USER_DATA_GS_0 regSPI_SHADER_USER_DATA_GS_0; -typedef union SPI_SHADER_USER_DATA_GS_1 regSPI_SHADER_USER_DATA_GS_1; -typedef union SPI_SHADER_USER_DATA_GS_2 regSPI_SHADER_USER_DATA_GS_2; -typedef union SPI_SHADER_USER_DATA_GS_3 regSPI_SHADER_USER_DATA_GS_3; -typedef union SPI_SHADER_USER_DATA_GS_4 regSPI_SHADER_USER_DATA_GS_4; -typedef union SPI_SHADER_USER_DATA_GS_5 regSPI_SHADER_USER_DATA_GS_5; -typedef union SPI_SHADER_USER_DATA_GS_6 regSPI_SHADER_USER_DATA_GS_6; -typedef union SPI_SHADER_USER_DATA_GS_7 regSPI_SHADER_USER_DATA_GS_7; -typedef union SPI_SHADER_USER_DATA_GS_8 regSPI_SHADER_USER_DATA_GS_8; -typedef union SPI_SHADER_USER_DATA_GS_9 regSPI_SHADER_USER_DATA_GS_9; -typedef union SPI_SHADER_USER_DATA_GS_10 regSPI_SHADER_USER_DATA_GS_10; -typedef union SPI_SHADER_USER_DATA_GS_11 regSPI_SHADER_USER_DATA_GS_11; -typedef union SPI_SHADER_USER_DATA_GS_12 regSPI_SHADER_USER_DATA_GS_12; -typedef union SPI_SHADER_USER_DATA_GS_13 regSPI_SHADER_USER_DATA_GS_13; -typedef union SPI_SHADER_USER_DATA_GS_14 regSPI_SHADER_USER_DATA_GS_14; -typedef union SPI_SHADER_USER_DATA_GS_15 regSPI_SHADER_USER_DATA_GS_15; -typedef union SPI_SHADER_USER_DATA_GS_16 regSPI_SHADER_USER_DATA_GS_16; -typedef union SPI_SHADER_USER_DATA_GS_17 regSPI_SHADER_USER_DATA_GS_17; -typedef union SPI_SHADER_USER_DATA_GS_18 regSPI_SHADER_USER_DATA_GS_18; -typedef union SPI_SHADER_USER_DATA_GS_19 regSPI_SHADER_USER_DATA_GS_19; -typedef union SPI_SHADER_USER_DATA_GS_20 regSPI_SHADER_USER_DATA_GS_20; -typedef union SPI_SHADER_USER_DATA_GS_21 regSPI_SHADER_USER_DATA_GS_21; -typedef union SPI_SHADER_USER_DATA_GS_22 regSPI_SHADER_USER_DATA_GS_22; -typedef union SPI_SHADER_USER_DATA_GS_23 regSPI_SHADER_USER_DATA_GS_23; -typedef union SPI_SHADER_USER_DATA_GS_24 regSPI_SHADER_USER_DATA_GS_24; -typedef union SPI_SHADER_USER_DATA_GS_25 regSPI_SHADER_USER_DATA_GS_25; -typedef union SPI_SHADER_USER_DATA_GS_26 regSPI_SHADER_USER_DATA_GS_26; -typedef union SPI_SHADER_USER_DATA_GS_27 regSPI_SHADER_USER_DATA_GS_27; -typedef union SPI_SHADER_USER_DATA_GS_28 regSPI_SHADER_USER_DATA_GS_28; -typedef union SPI_SHADER_USER_DATA_GS_29 regSPI_SHADER_USER_DATA_GS_29; -typedef union SPI_SHADER_USER_DATA_GS_30 regSPI_SHADER_USER_DATA_GS_30; -typedef union SPI_SHADER_USER_DATA_GS_31 regSPI_SHADER_USER_DATA_GS_31; -typedef union SPI_SHADER_USER_DATA_HS_0 regSPI_SHADER_USER_DATA_HS_0; -typedef union SPI_SHADER_USER_DATA_HS_1 regSPI_SHADER_USER_DATA_HS_1; -typedef union SPI_SHADER_USER_DATA_HS_2 regSPI_SHADER_USER_DATA_HS_2; -typedef union SPI_SHADER_USER_DATA_HS_3 regSPI_SHADER_USER_DATA_HS_3; -typedef union SPI_SHADER_USER_DATA_HS_4 regSPI_SHADER_USER_DATA_HS_4; -typedef union SPI_SHADER_USER_DATA_HS_5 regSPI_SHADER_USER_DATA_HS_5; -typedef union SPI_SHADER_USER_DATA_HS_6 regSPI_SHADER_USER_DATA_HS_6; -typedef union SPI_SHADER_USER_DATA_HS_7 regSPI_SHADER_USER_DATA_HS_7; -typedef union SPI_SHADER_USER_DATA_HS_8 regSPI_SHADER_USER_DATA_HS_8; -typedef union SPI_SHADER_USER_DATA_HS_9 regSPI_SHADER_USER_DATA_HS_9; -typedef union SPI_SHADER_USER_DATA_HS_10 regSPI_SHADER_USER_DATA_HS_10; -typedef union SPI_SHADER_USER_DATA_HS_11 regSPI_SHADER_USER_DATA_HS_11; -typedef union SPI_SHADER_USER_DATA_HS_12 regSPI_SHADER_USER_DATA_HS_12; -typedef union SPI_SHADER_USER_DATA_HS_13 regSPI_SHADER_USER_DATA_HS_13; -typedef union SPI_SHADER_USER_DATA_HS_14 regSPI_SHADER_USER_DATA_HS_14; -typedef union SPI_SHADER_USER_DATA_HS_15 regSPI_SHADER_USER_DATA_HS_15; -typedef union SPI_SHADER_USER_DATA_HS_16 regSPI_SHADER_USER_DATA_HS_16; -typedef union SPI_SHADER_USER_DATA_HS_17 regSPI_SHADER_USER_DATA_HS_17; -typedef union SPI_SHADER_USER_DATA_HS_18 regSPI_SHADER_USER_DATA_HS_18; -typedef union SPI_SHADER_USER_DATA_HS_19 regSPI_SHADER_USER_DATA_HS_19; -typedef union SPI_SHADER_USER_DATA_HS_20 regSPI_SHADER_USER_DATA_HS_20; -typedef union SPI_SHADER_USER_DATA_HS_21 regSPI_SHADER_USER_DATA_HS_21; -typedef union SPI_SHADER_USER_DATA_HS_22 regSPI_SHADER_USER_DATA_HS_22; -typedef union SPI_SHADER_USER_DATA_HS_23 regSPI_SHADER_USER_DATA_HS_23; -typedef union SPI_SHADER_USER_DATA_HS_24 regSPI_SHADER_USER_DATA_HS_24; -typedef union SPI_SHADER_USER_DATA_HS_25 regSPI_SHADER_USER_DATA_HS_25; -typedef union SPI_SHADER_USER_DATA_HS_26 regSPI_SHADER_USER_DATA_HS_26; -typedef union SPI_SHADER_USER_DATA_HS_27 regSPI_SHADER_USER_DATA_HS_27; -typedef union SPI_SHADER_USER_DATA_HS_28 regSPI_SHADER_USER_DATA_HS_28; -typedef union SPI_SHADER_USER_DATA_HS_29 regSPI_SHADER_USER_DATA_HS_29; -typedef union SPI_SHADER_USER_DATA_HS_30 regSPI_SHADER_USER_DATA_HS_30; -typedef union SPI_SHADER_USER_DATA_HS_31 regSPI_SHADER_USER_DATA_HS_31; -typedef union SPI_SHADER_USER_DATA_LS_0 regSPI_SHADER_USER_DATA_LS_0; -typedef union SPI_SHADER_USER_DATA_LS_1 regSPI_SHADER_USER_DATA_LS_1; -typedef union SPI_SHADER_USER_DATA_LS_2 regSPI_SHADER_USER_DATA_LS_2; -typedef union SPI_SHADER_USER_DATA_LS_3 regSPI_SHADER_USER_DATA_LS_3; -typedef union SPI_SHADER_USER_DATA_LS_4 regSPI_SHADER_USER_DATA_LS_4; -typedef union SPI_SHADER_USER_DATA_LS_5 regSPI_SHADER_USER_DATA_LS_5; -typedef union SPI_SHADER_USER_DATA_LS_6 regSPI_SHADER_USER_DATA_LS_6; -typedef union SPI_SHADER_USER_DATA_LS_7 regSPI_SHADER_USER_DATA_LS_7; -typedef union SPI_SHADER_USER_DATA_LS_8 regSPI_SHADER_USER_DATA_LS_8; -typedef union SPI_SHADER_USER_DATA_LS_9 regSPI_SHADER_USER_DATA_LS_9; -typedef union SPI_SHADER_USER_DATA_LS_10 regSPI_SHADER_USER_DATA_LS_10; -typedef union SPI_SHADER_USER_DATA_LS_11 regSPI_SHADER_USER_DATA_LS_11; -typedef union SPI_SHADER_USER_DATA_LS_12 regSPI_SHADER_USER_DATA_LS_12; -typedef union SPI_SHADER_USER_DATA_LS_13 regSPI_SHADER_USER_DATA_LS_13; -typedef union SPI_SHADER_USER_DATA_LS_14 regSPI_SHADER_USER_DATA_LS_14; -typedef union SPI_SHADER_USER_DATA_LS_15 regSPI_SHADER_USER_DATA_LS_15; -typedef union SPI_SHADER_USER_DATA_LS_16 regSPI_SHADER_USER_DATA_LS_16; -typedef union SPI_SHADER_USER_DATA_LS_17 regSPI_SHADER_USER_DATA_LS_17; -typedef union SPI_SHADER_USER_DATA_LS_18 regSPI_SHADER_USER_DATA_LS_18; -typedef union SPI_SHADER_USER_DATA_LS_19 regSPI_SHADER_USER_DATA_LS_19; -typedef union SPI_SHADER_USER_DATA_LS_20 regSPI_SHADER_USER_DATA_LS_20; -typedef union SPI_SHADER_USER_DATA_LS_21 regSPI_SHADER_USER_DATA_LS_21; -typedef union SPI_SHADER_USER_DATA_LS_22 regSPI_SHADER_USER_DATA_LS_22; -typedef union SPI_SHADER_USER_DATA_LS_23 regSPI_SHADER_USER_DATA_LS_23; -typedef union SPI_SHADER_USER_DATA_LS_24 regSPI_SHADER_USER_DATA_LS_24; -typedef union SPI_SHADER_USER_DATA_LS_25 regSPI_SHADER_USER_DATA_LS_25; -typedef union SPI_SHADER_USER_DATA_LS_26 regSPI_SHADER_USER_DATA_LS_26; -typedef union SPI_SHADER_USER_DATA_LS_27 regSPI_SHADER_USER_DATA_LS_27; -typedef union SPI_SHADER_USER_DATA_LS_28 regSPI_SHADER_USER_DATA_LS_28; -typedef union SPI_SHADER_USER_DATA_LS_29 regSPI_SHADER_USER_DATA_LS_29; -typedef union SPI_SHADER_USER_DATA_LS_30 regSPI_SHADER_USER_DATA_LS_30; -typedef union SPI_SHADER_USER_DATA_LS_31 regSPI_SHADER_USER_DATA_LS_31; -typedef union SPI_SHADER_USER_DATA_PS_0 regSPI_SHADER_USER_DATA_PS_0; -typedef union SPI_SHADER_USER_DATA_PS_1 regSPI_SHADER_USER_DATA_PS_1; -typedef union SPI_SHADER_USER_DATA_PS_2 regSPI_SHADER_USER_DATA_PS_2; -typedef union SPI_SHADER_USER_DATA_PS_3 regSPI_SHADER_USER_DATA_PS_3; -typedef union SPI_SHADER_USER_DATA_PS_4 regSPI_SHADER_USER_DATA_PS_4; -typedef union SPI_SHADER_USER_DATA_PS_5 regSPI_SHADER_USER_DATA_PS_5; -typedef union SPI_SHADER_USER_DATA_PS_6 regSPI_SHADER_USER_DATA_PS_6; -typedef union SPI_SHADER_USER_DATA_PS_7 regSPI_SHADER_USER_DATA_PS_7; -typedef union SPI_SHADER_USER_DATA_PS_8 regSPI_SHADER_USER_DATA_PS_8; -typedef union SPI_SHADER_USER_DATA_PS_9 regSPI_SHADER_USER_DATA_PS_9; -typedef union SPI_SHADER_USER_DATA_PS_10 regSPI_SHADER_USER_DATA_PS_10; -typedef union SPI_SHADER_USER_DATA_PS_11 regSPI_SHADER_USER_DATA_PS_11; -typedef union SPI_SHADER_USER_DATA_PS_12 regSPI_SHADER_USER_DATA_PS_12; -typedef union SPI_SHADER_USER_DATA_PS_13 regSPI_SHADER_USER_DATA_PS_13; -typedef union SPI_SHADER_USER_DATA_PS_14 regSPI_SHADER_USER_DATA_PS_14; -typedef union SPI_SHADER_USER_DATA_PS_15 regSPI_SHADER_USER_DATA_PS_15; -typedef union SPI_SHADER_USER_DATA_PS_16 regSPI_SHADER_USER_DATA_PS_16; -typedef union SPI_SHADER_USER_DATA_PS_17 regSPI_SHADER_USER_DATA_PS_17; -typedef union SPI_SHADER_USER_DATA_PS_18 regSPI_SHADER_USER_DATA_PS_18; -typedef union SPI_SHADER_USER_DATA_PS_19 regSPI_SHADER_USER_DATA_PS_19; -typedef union SPI_SHADER_USER_DATA_PS_20 regSPI_SHADER_USER_DATA_PS_20; -typedef union SPI_SHADER_USER_DATA_PS_21 regSPI_SHADER_USER_DATA_PS_21; -typedef union SPI_SHADER_USER_DATA_PS_22 regSPI_SHADER_USER_DATA_PS_22; -typedef union SPI_SHADER_USER_DATA_PS_23 regSPI_SHADER_USER_DATA_PS_23; -typedef union SPI_SHADER_USER_DATA_PS_24 regSPI_SHADER_USER_DATA_PS_24; -typedef union SPI_SHADER_USER_DATA_PS_25 regSPI_SHADER_USER_DATA_PS_25; -typedef union SPI_SHADER_USER_DATA_PS_26 regSPI_SHADER_USER_DATA_PS_26; -typedef union SPI_SHADER_USER_DATA_PS_27 regSPI_SHADER_USER_DATA_PS_27; -typedef union SPI_SHADER_USER_DATA_PS_28 regSPI_SHADER_USER_DATA_PS_28; -typedef union SPI_SHADER_USER_DATA_PS_29 regSPI_SHADER_USER_DATA_PS_29; -typedef union SPI_SHADER_USER_DATA_PS_30 regSPI_SHADER_USER_DATA_PS_30; -typedef union SPI_SHADER_USER_DATA_PS_31 regSPI_SHADER_USER_DATA_PS_31; -typedef union SPI_SHADER_USER_DATA_VS_0 regSPI_SHADER_USER_DATA_VS_0; -typedef union SPI_SHADER_USER_DATA_VS_1 regSPI_SHADER_USER_DATA_VS_1; -typedef union SPI_SHADER_USER_DATA_VS_2 regSPI_SHADER_USER_DATA_VS_2; -typedef union SPI_SHADER_USER_DATA_VS_3 regSPI_SHADER_USER_DATA_VS_3; -typedef union SPI_SHADER_USER_DATA_VS_4 regSPI_SHADER_USER_DATA_VS_4; -typedef union SPI_SHADER_USER_DATA_VS_5 regSPI_SHADER_USER_DATA_VS_5; -typedef union SPI_SHADER_USER_DATA_VS_6 regSPI_SHADER_USER_DATA_VS_6; -typedef union SPI_SHADER_USER_DATA_VS_7 regSPI_SHADER_USER_DATA_VS_7; -typedef union SPI_SHADER_USER_DATA_VS_8 regSPI_SHADER_USER_DATA_VS_8; -typedef union SPI_SHADER_USER_DATA_VS_9 regSPI_SHADER_USER_DATA_VS_9; -typedef union SPI_SHADER_USER_DATA_VS_10 regSPI_SHADER_USER_DATA_VS_10; -typedef union SPI_SHADER_USER_DATA_VS_11 regSPI_SHADER_USER_DATA_VS_11; -typedef union SPI_SHADER_USER_DATA_VS_12 regSPI_SHADER_USER_DATA_VS_12; -typedef union SPI_SHADER_USER_DATA_VS_13 regSPI_SHADER_USER_DATA_VS_13; -typedef union SPI_SHADER_USER_DATA_VS_14 regSPI_SHADER_USER_DATA_VS_14; -typedef union SPI_SHADER_USER_DATA_VS_15 regSPI_SHADER_USER_DATA_VS_15; -typedef union SPI_SHADER_USER_DATA_VS_16 regSPI_SHADER_USER_DATA_VS_16; -typedef union SPI_SHADER_USER_DATA_VS_17 regSPI_SHADER_USER_DATA_VS_17; -typedef union SPI_SHADER_USER_DATA_VS_18 regSPI_SHADER_USER_DATA_VS_18; -typedef union SPI_SHADER_USER_DATA_VS_19 regSPI_SHADER_USER_DATA_VS_19; -typedef union SPI_SHADER_USER_DATA_VS_20 regSPI_SHADER_USER_DATA_VS_20; -typedef union SPI_SHADER_USER_DATA_VS_21 regSPI_SHADER_USER_DATA_VS_21; -typedef union SPI_SHADER_USER_DATA_VS_22 regSPI_SHADER_USER_DATA_VS_22; -typedef union SPI_SHADER_USER_DATA_VS_23 regSPI_SHADER_USER_DATA_VS_23; -typedef union SPI_SHADER_USER_DATA_VS_24 regSPI_SHADER_USER_DATA_VS_24; -typedef union SPI_SHADER_USER_DATA_VS_25 regSPI_SHADER_USER_DATA_VS_25; -typedef union SPI_SHADER_USER_DATA_VS_26 regSPI_SHADER_USER_DATA_VS_26; -typedef union SPI_SHADER_USER_DATA_VS_27 regSPI_SHADER_USER_DATA_VS_27; -typedef union SPI_SHADER_USER_DATA_VS_28 regSPI_SHADER_USER_DATA_VS_28; -typedef union SPI_SHADER_USER_DATA_VS_29 regSPI_SHADER_USER_DATA_VS_29; -typedef union SPI_SHADER_USER_DATA_VS_30 regSPI_SHADER_USER_DATA_VS_30; -typedef union SPI_SHADER_USER_DATA_VS_31 regSPI_SHADER_USER_DATA_VS_31; -typedef union SPI_SHADER_Z_FORMAT regSPI_SHADER_Z_FORMAT; -typedef union SPI_START_PHASE regSPI_START_PHASE; -typedef union SPI_SX_EXPORT_BUFFER_SIZES regSPI_SX_EXPORT_BUFFER_SIZES; -typedef union SPI_SX_SCOREBOARD_BUFFER_SIZES regSPI_SX_SCOREBOARD_BUFFER_SIZES; -typedef union SPI_SYS_COMPUTE regSPI_SYS_COMPUTE; -typedef union SPI_SYS_WIF_CNTL regSPI_SYS_WIF_CNTL; -typedef union SPI_TMPRING_SIZE regSPI_TMPRING_SIZE; -typedef union SPI_USER_ACCUM_VMID_CNTL regSPI_USER_ACCUM_VMID_CNTL; -typedef union SPI_VS_OUT_CONFIG regSPI_VS_OUT_CONFIG; -typedef union SPI_WAVE_LIMIT_CNTL regSPI_WAVE_LIMIT_CNTL; -typedef union SPI_WAVE_LIMIT_CNTL_REMAP regSPI_WAVE_LIMIT_CNTL_REMAP; -typedef union SPI_WCL_PIPE_PERCENT_CS0 regSPI_WCL_PIPE_PERCENT_CS0; -typedef union SPI_WCL_PIPE_PERCENT_CS1 regSPI_WCL_PIPE_PERCENT_CS1; -typedef union SPI_WCL_PIPE_PERCENT_CS2 regSPI_WCL_PIPE_PERCENT_CS2; -typedef union SPI_WCL_PIPE_PERCENT_CS3 regSPI_WCL_PIPE_PERCENT_CS3; -typedef union SPI_WCL_PIPE_PERCENT_CS4 regSPI_WCL_PIPE_PERCENT_CS4; -typedef union SPI_WCL_PIPE_PERCENT_CS5 regSPI_WCL_PIPE_PERCENT_CS5; -typedef union SPI_WCL_PIPE_PERCENT_CS6 regSPI_WCL_PIPE_PERCENT_CS6; -typedef union SPI_WCL_PIPE_PERCENT_CS7 regSPI_WCL_PIPE_PERCENT_CS7; -typedef union SPI_WCL_PIPE_PERCENT_GFX regSPI_WCL_PIPE_PERCENT_GFX; -typedef union SPI_WCL_PIPE_PERCENT_HP3D regSPI_WCL_PIPE_PERCENT_HP3D; -typedef union SPI_WF_ACTIVE_COUNT_GFX regSPI_WF_ACTIVE_COUNT_GFX; -typedef union SPI_WF_ACTIVE_COUNT_HPG regSPI_WF_ACTIVE_COUNT_HPG; -typedef union SPI_WF_LIFETIME_CNTL regSPI_WF_LIFETIME_CNTL; -typedef union SPI_WF_LIFETIME_LIMIT_0 regSPI_WF_LIFETIME_LIMIT_0; -typedef union SPI_WF_LIFETIME_LIMIT_1 regSPI_WF_LIFETIME_LIMIT_1; -typedef union SPI_WF_LIFETIME_LIMIT_2 regSPI_WF_LIFETIME_LIMIT_2; -typedef union SPI_WF_LIFETIME_LIMIT_3 regSPI_WF_LIFETIME_LIMIT_3; -typedef union SPI_WF_LIFETIME_LIMIT_4 regSPI_WF_LIFETIME_LIMIT_4; -typedef union SPI_WF_LIFETIME_LIMIT_5 regSPI_WF_LIFETIME_LIMIT_5; -typedef union SPI_WF_LIFETIME_LIMIT_6 regSPI_WF_LIFETIME_LIMIT_6; -typedef union SPI_WF_LIFETIME_LIMIT_7 regSPI_WF_LIFETIME_LIMIT_7; -typedef union SPI_WF_LIFETIME_LIMIT_8 regSPI_WF_LIFETIME_LIMIT_8; -typedef union SPI_WF_LIFETIME_LIMIT_9 regSPI_WF_LIFETIME_LIMIT_9; -typedef union SPI_WF_LIFETIME_STATUS_0 regSPI_WF_LIFETIME_STATUS_0; -typedef union SPI_WF_LIFETIME_STATUS_1 regSPI_WF_LIFETIME_STATUS_1; -typedef union SPI_WF_LIFETIME_STATUS_2 regSPI_WF_LIFETIME_STATUS_2; -typedef union SPI_WF_LIFETIME_STATUS_3 regSPI_WF_LIFETIME_STATUS_3; -typedef union SPI_WF_LIFETIME_STATUS_4 regSPI_WF_LIFETIME_STATUS_4; -typedef union SPI_WF_LIFETIME_STATUS_5 regSPI_WF_LIFETIME_STATUS_5; -typedef union SPI_WF_LIFETIME_STATUS_6 regSPI_WF_LIFETIME_STATUS_6; -typedef union SPI_WF_LIFETIME_STATUS_7 regSPI_WF_LIFETIME_STATUS_7; -typedef union SPI_WF_LIFETIME_STATUS_8 regSPI_WF_LIFETIME_STATUS_8; -typedef union SPI_WF_LIFETIME_STATUS_9 regSPI_WF_LIFETIME_STATUS_9; -typedef union SPI_WF_LIFETIME_STATUS_10 regSPI_WF_LIFETIME_STATUS_10; -typedef union SPI_WF_LIFETIME_STATUS_11 regSPI_WF_LIFETIME_STATUS_11; -typedef union SPI_WF_LIFETIME_STATUS_12 regSPI_WF_LIFETIME_STATUS_12; -typedef union SPI_WF_LIFETIME_STATUS_13 regSPI_WF_LIFETIME_STATUS_13; -typedef union SPI_WF_LIFETIME_STATUS_14 regSPI_WF_LIFETIME_STATUS_14; -typedef union SPI_WF_LIFETIME_STATUS_15 regSPI_WF_LIFETIME_STATUS_15; -typedef union SPI_WF_LIFETIME_STATUS_16 regSPI_WF_LIFETIME_STATUS_16; -typedef union SPI_WF_LIFETIME_STATUS_17 regSPI_WF_LIFETIME_STATUS_17; -typedef union SPI_WF_LIFETIME_STATUS_18 regSPI_WF_LIFETIME_STATUS_18; -typedef union SPI_WF_LIFETIME_STATUS_19 regSPI_WF_LIFETIME_STATUS_19; -typedef union SPI_WF_LIFETIME_STATUS_20 regSPI_WF_LIFETIME_STATUS_20; -typedef union SPI_WF_LIFETIME_STATUS_21 regSPI_WF_LIFETIME_STATUS_21; -typedef union SQG_CONFIG regSQG_CONFIG; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union SQG_GL1H_STATUS regSQG_GL1H_STATUS; -typedef union SQG_PERFCOUNTER0_HI regSQG_PERFCOUNTER0_HI; -typedef union SQG_PERFCOUNTER0_LO regSQG_PERFCOUNTER0_LO; -typedef union SQG_PERFCOUNTER0_SELECT regSQG_PERFCOUNTER0_SELECT; -typedef union SQG_PERFCOUNTER1_HI regSQG_PERFCOUNTER1_HI; -typedef union SQG_PERFCOUNTER1_LO regSQG_PERFCOUNTER1_LO; -typedef union SQG_PERFCOUNTER1_SELECT regSQG_PERFCOUNTER1_SELECT; -typedef union SQG_PERFCOUNTER2_HI regSQG_PERFCOUNTER2_HI; -typedef union SQG_PERFCOUNTER2_LO regSQG_PERFCOUNTER2_LO; -typedef union SQG_PERFCOUNTER2_SELECT regSQG_PERFCOUNTER2_SELECT; -typedef union SQG_PERFCOUNTER3_HI regSQG_PERFCOUNTER3_HI; -typedef union SQG_PERFCOUNTER3_LO regSQG_PERFCOUNTER3_LO; -typedef union SQG_PERFCOUNTER3_SELECT regSQG_PERFCOUNTER3_SELECT; -typedef union SQG_PERFCOUNTER4_HI regSQG_PERFCOUNTER4_HI; -typedef union SQG_PERFCOUNTER4_LO regSQG_PERFCOUNTER4_LO; -typedef union SQG_PERFCOUNTER4_SELECT regSQG_PERFCOUNTER4_SELECT; -typedef union SQG_PERFCOUNTER5_HI regSQG_PERFCOUNTER5_HI; -typedef union SQG_PERFCOUNTER5_LO regSQG_PERFCOUNTER5_LO; -typedef union SQG_PERFCOUNTER5_SELECT regSQG_PERFCOUNTER5_SELECT; -typedef union SQG_PERFCOUNTER6_HI regSQG_PERFCOUNTER6_HI; -typedef union SQG_PERFCOUNTER6_LO regSQG_PERFCOUNTER6_LO; -typedef union SQG_PERFCOUNTER6_SELECT regSQG_PERFCOUNTER6_SELECT; -typedef union SQG_PERFCOUNTER7_HI regSQG_PERFCOUNTER7_HI; -typedef union SQG_PERFCOUNTER7_LO regSQG_PERFCOUNTER7_LO; -typedef union SQG_PERFCOUNTER7_SELECT regSQG_PERFCOUNTER7_SELECT; -typedef union SQG_PERFCOUNTER_CTRL regSQG_PERFCOUNTER_CTRL; -typedef union SQG_PERFCOUNTER_CTRL2 regSQG_PERFCOUNTER_CTRL2; -typedef union SQG_PERF_SAMPLE_FINISH regSQG_PERF_SAMPLE_FINISH; -#endif -typedef union SQG_STATUS regSQG_STATUS; -typedef union SQG_UTCL0_CNTL1 regSQG_UTCL0_CNTL1; -typedef union SQG_UTCL0_CNTL2 regSQG_UTCL0_CNTL2; -typedef union SQG_UTCL0_STATUS regSQG_UTCL0_STATUS; -typedef union SQ_ALU_CLK_CTRL regSQ_ALU_CLK_CTRL; -typedef union SQ_ARB_CONFIG regSQ_ARB_CONFIG; -typedef union SQ_BUF_RSRC_WORD0 regSQ_BUF_RSRC_WORD0; -typedef union SQ_BUF_RSRC_WORD1 regSQ_BUF_RSRC_WORD1; -typedef union SQ_BUF_RSRC_WORD2 regSQ_BUF_RSRC_WORD2; -typedef union SQ_BUF_RSRC_WORD3 regSQ_BUF_RSRC_WORD3; -typedef union SQ_CLK_CTRL regSQ_CLK_CTRL; -typedef union SQ_CMD regSQ_CMD; -typedef union SQ_CMD_TIMESTAMP regSQ_CMD_TIMESTAMP; -typedef union SQ_CONFIG regSQ_CONFIG; -typedef union SQ_DSM_CNTL regSQ_DSM_CNTL; -typedef union SQ_DSM_CNTL2 regSQ_DSM_CNTL2; -typedef union SQ_DS_0 regSQ_DS_0; -typedef union SQ_DS_1 regSQ_DS_1; -typedef union SQ_EDC_CNT regSQ_EDC_CNT; -typedef union SQ_EDC_DED_CNT regSQ_EDC_DED_CNT; -typedef union SQ_EDC_FUE_CNTL regSQ_EDC_FUE_CNTL; -typedef union SQ_EDC_INFO regSQ_EDC_INFO; -typedef union SQ_EDC_SEC_CNT regSQ_EDC_SEC_CNT; -typedef union SQ_EXP_0 regSQ_EXP_0; -typedef union SQ_EXP_1 regSQ_EXP_1; -typedef union SQ_FIFO_SIZES regSQ_FIFO_SIZES; -typedef union SQ_FLAT_0 regSQ_FLAT_0; -typedef union SQ_FLAT_1 regSQ_FLAT_1; -typedef union SQ_FLAT_SCRATCH_WORD0 regSQ_FLAT_SCRATCH_WORD0; -typedef union SQ_FLAT_SCRATCH_WORD1 regSQ_FLAT_SCRATCH_WORD1; -typedef union SQ_GLBL_0 regSQ_GLBL_0; -typedef union SQ_GLBL_1 regSQ_GLBL_1; -typedef union SQ_IMG_RSRC_WORD0 regSQ_IMG_RSRC_WORD0; -typedef union SQ_IMG_RSRC_WORD1 regSQ_IMG_RSRC_WORD1; -typedef union SQ_IMG_RSRC_WORD2 regSQ_IMG_RSRC_WORD2; -typedef union SQ_IMG_RSRC_WORD3 regSQ_IMG_RSRC_WORD3; -typedef union SQ_IMG_RSRC_WORD4 regSQ_IMG_RSRC_WORD4; -typedef union SQ_IMG_RSRC_WORD5 regSQ_IMG_RSRC_WORD5; -typedef union SQ_IMG_RSRC_WORD6 regSQ_IMG_RSRC_WORD6; -typedef union SQ_IMG_RSRC_WORD7 regSQ_IMG_RSRC_WORD7; -typedef union SQ_IMG_SAMP_WORD0 regSQ_IMG_SAMP_WORD0; -typedef union SQ_IMG_SAMP_WORD1 regSQ_IMG_SAMP_WORD1; -typedef union SQ_IMG_SAMP_WORD2 regSQ_IMG_SAMP_WORD2; -typedef union SQ_IMG_SAMP_WORD3 regSQ_IMG_SAMP_WORD3; -typedef union SQ_IND_DATA regSQ_IND_DATA; -typedef union SQ_IND_INDEX regSQ_IND_INDEX; -typedef union SQ_INST regSQ_INST; -typedef union SQ_INTERRUPT_AUTO_MASK regSQ_INTERRUPT_AUTO_MASK; -typedef union SQ_INTERRUPT_MSG_CTRL regSQ_INTERRUPT_MSG_CTRL; -typedef union SQ_INTERRUPT_WORD_AUTO_CTXID regSQ_INTERRUPT_WORD_AUTO_CTXID; -typedef union SQ_INTERRUPT_WORD_AUTO_HI regSQ_INTERRUPT_WORD_AUTO_HI; -typedef union SQ_INTERRUPT_WORD_AUTO_LO regSQ_INTERRUPT_WORD_AUTO_LO; -typedef union SQ_INTERRUPT_WORD_CMN_CTXID regSQ_INTERRUPT_WORD_CMN_CTXID; -typedef union SQ_INTERRUPT_WORD_CMN_HI regSQ_INTERRUPT_WORD_CMN_HI; -typedef union SQ_INTERRUPT_WORD_WAVE_CTXID regSQ_INTERRUPT_WORD_WAVE_CTXID; -typedef union SQ_INTERRUPT_WORD_WAVE_HI regSQ_INTERRUPT_WORD_WAVE_HI; -typedef union SQ_INTERRUPT_WORD_WAVE_LO regSQ_INTERRUPT_WORD_WAVE_LO; -typedef union SQ_LB_CTR0_CU regSQ_LB_CTR0_CU; -typedef union SQ_LB_CTR1_CU regSQ_LB_CTR1_CU; -typedef union SQ_LB_CTR2_CU regSQ_LB_CTR2_CU; -typedef union SQ_LB_CTR3_CU regSQ_LB_CTR3_CU; -typedef union SQ_LB_CTR_CTRL regSQ_LB_CTR_CTRL; -typedef union SQ_LB_CTR_SEL regSQ_LB_CTR_SEL; -typedef union SQ_LB_CTR_SEL0 regSQ_LB_CTR_SEL0; -typedef union SQ_LB_CTR_SEL1 regSQ_LB_CTR_SEL1; -typedef union SQ_LB_DATA0 regSQ_LB_DATA0; -typedef union SQ_LB_DATA1 regSQ_LB_DATA1; -typedef union SQ_LB_DATA2 regSQ_LB_DATA2; -typedef union SQ_LB_DATA3 regSQ_LB_DATA3; -typedef union SQ_LDS_CLK_CTRL regSQ_LDS_CLK_CTRL; -typedef union SQ_M0_GPR_IDX_WORD regSQ_M0_GPR_IDX_WORD; -typedef union SQ_MIMG_0 regSQ_MIMG_0; -typedef union SQ_MIMG_1 regSQ_MIMG_1; -typedef union SQ_MTBUF_0 regSQ_MTBUF_0; -typedef union SQ_MTBUF_1 regSQ_MTBUF_1; -typedef union SQ_MUBUF_0 regSQ_MUBUF_0; -typedef union SQ_MUBUF_1 regSQ_MUBUF_1; -typedef union SQ_PERFCOUNTER0_HI regSQ_PERFCOUNTER0_HI; -typedef union SQ_PERFCOUNTER0_LO regSQ_PERFCOUNTER0_LO; -typedef union SQ_PERFCOUNTER0_SELECT regSQ_PERFCOUNTER0_SELECT; -typedef union SQ_PERFCOUNTER1_HI regSQ_PERFCOUNTER1_HI; -typedef union SQ_PERFCOUNTER1_LO regSQ_PERFCOUNTER1_LO; -typedef union SQ_PERFCOUNTER1_SELECT regSQ_PERFCOUNTER1_SELECT; -typedef union SQ_PERFCOUNTER2_HI regSQ_PERFCOUNTER2_HI; -typedef union SQ_PERFCOUNTER2_LO regSQ_PERFCOUNTER2_LO; -typedef union SQ_PERFCOUNTER2_SELECT regSQ_PERFCOUNTER2_SELECT; -typedef union SQ_PERFCOUNTER3_HI regSQ_PERFCOUNTER3_HI; -typedef union SQ_PERFCOUNTER3_LO regSQ_PERFCOUNTER3_LO; -typedef union SQ_PERFCOUNTER3_SELECT regSQ_PERFCOUNTER3_SELECT; -typedef union SQ_PERFCOUNTER4_HI regSQ_PERFCOUNTER4_HI; -typedef union SQ_PERFCOUNTER4_LO regSQ_PERFCOUNTER4_LO; -typedef union SQ_PERFCOUNTER4_SELECT regSQ_PERFCOUNTER4_SELECT; -typedef union SQ_PERFCOUNTER5_HI regSQ_PERFCOUNTER5_HI; -typedef union SQ_PERFCOUNTER5_LO regSQ_PERFCOUNTER5_LO; -typedef union SQ_PERFCOUNTER5_SELECT regSQ_PERFCOUNTER5_SELECT; -typedef union SQ_PERFCOUNTER6_HI regSQ_PERFCOUNTER6_HI; -typedef union SQ_PERFCOUNTER6_LO regSQ_PERFCOUNTER6_LO; -typedef union SQ_PERFCOUNTER6_SELECT regSQ_PERFCOUNTER6_SELECT; -typedef union SQ_PERFCOUNTER7_HI regSQ_PERFCOUNTER7_HI; -typedef union SQ_PERFCOUNTER7_LO regSQ_PERFCOUNTER7_LO; -typedef union SQ_PERFCOUNTER7_SELECT regSQ_PERFCOUNTER7_SELECT; -typedef union SQ_PERFCOUNTER8_HI regSQ_PERFCOUNTER8_HI; -typedef union SQ_PERFCOUNTER8_LO regSQ_PERFCOUNTER8_LO; -typedef union SQ_PERFCOUNTER8_SELECT regSQ_PERFCOUNTER8_SELECT; -typedef union SQ_PERFCOUNTER9_HI regSQ_PERFCOUNTER9_HI; -typedef union SQ_PERFCOUNTER9_LO regSQ_PERFCOUNTER9_LO; -typedef union SQ_PERFCOUNTER9_SELECT regSQ_PERFCOUNTER9_SELECT; -typedef union SQ_PERFCOUNTER10_HI regSQ_PERFCOUNTER10_HI; -typedef union SQ_PERFCOUNTER10_LO regSQ_PERFCOUNTER10_LO; -typedef union SQ_PERFCOUNTER10_SELECT regSQ_PERFCOUNTER10_SELECT; -typedef union SQ_PERFCOUNTER11_HI regSQ_PERFCOUNTER11_HI; -typedef union SQ_PERFCOUNTER11_LO regSQ_PERFCOUNTER11_LO; -typedef union SQ_PERFCOUNTER11_SELECT regSQ_PERFCOUNTER11_SELECT; -typedef union SQ_PERFCOUNTER12_HI regSQ_PERFCOUNTER12_HI; -typedef union SQ_PERFCOUNTER12_LO regSQ_PERFCOUNTER12_LO; -typedef union SQ_PERFCOUNTER12_SELECT regSQ_PERFCOUNTER12_SELECT; -typedef union SQ_PERFCOUNTER13_HI regSQ_PERFCOUNTER13_HI; -typedef union SQ_PERFCOUNTER13_LO regSQ_PERFCOUNTER13_LO; -typedef union SQ_PERFCOUNTER13_SELECT regSQ_PERFCOUNTER13_SELECT; -typedef union SQ_PERFCOUNTER14_HI regSQ_PERFCOUNTER14_HI; -typedef union SQ_PERFCOUNTER14_LO regSQ_PERFCOUNTER14_LO; -typedef union SQ_PERFCOUNTER14_SELECT regSQ_PERFCOUNTER14_SELECT; -typedef union SQ_PERFCOUNTER15_HI regSQ_PERFCOUNTER15_HI; -typedef union SQ_PERFCOUNTER15_LO regSQ_PERFCOUNTER15_LO; -typedef union SQ_PERFCOUNTER15_SELECT regSQ_PERFCOUNTER15_SELECT; -typedef union SQ_PERFCOUNTER_CTRL regSQ_PERFCOUNTER_CTRL; -typedef union SQ_PERFCOUNTER_CTRL2 regSQ_PERFCOUNTER_CTRL2; -typedef union SQ_PERFCOUNTER_MASK regSQ_PERFCOUNTER_MASK; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union SQ_PERF_SNAPSHOT_CTRL regSQ_PERF_SNAPSHOT_CTRL; -typedef union SQ_PERF_SNAPSHOT_DATA regSQ_PERF_SNAPSHOT_DATA; -typedef union SQ_PERF_SNAPSHOT_PC_HI regSQ_PERF_SNAPSHOT_PC_HI; -typedef union SQ_PERF_SNAPSHOT_PC_LO regSQ_PERF_SNAPSHOT_PC_LO; -#endif -typedef union SQ_POWER_THROTTLE regSQ_POWER_THROTTLE; -typedef union SQ_POWER_THROTTLE2 regSQ_POWER_THROTTLE2; -typedef union SQ_RANDOM_WAVE_PRI regSQ_RANDOM_WAVE_PRI; -typedef union SQ_REG_CREDITS regSQ_REG_CREDITS; -typedef union SQ_REG_TIMESTAMP regSQ_REG_TIMESTAMP; -typedef union SQ_RUNTIME_CONFIG regSQ_RUNTIME_CONFIG; -typedef union SQ_SCRATCH_0 regSQ_SCRATCH_0; -typedef union SQ_SCRATCH_1 regSQ_SCRATCH_1; -typedef union SQ_SHADER_TBA_HI regSQ_SHADER_TBA_HI; -typedef union SQ_SHADER_TBA_LO regSQ_SHADER_TBA_LO; -typedef union SQ_SHADER_TMA_HI regSQ_SHADER_TMA_HI; -typedef union SQ_SHADER_TMA_LO regSQ_SHADER_TMA_LO; -typedef union SQ_SMEM_0 regSQ_SMEM_0; -typedef union SQ_SMEM_1 regSQ_SMEM_1; -typedef union SQ_SOP1 regSQ_SOP1; -typedef union SQ_SOP2 regSQ_SOP2; -typedef union SQ_SOPC regSQ_SOPC; -typedef union SQ_SOPK regSQ_SOPK; -typedef union SQ_SOPP regSQ_SOPP; -typedef union SQ_TEX_CLK_CTRL regSQ_TEX_CLK_CTRL; -typedef union SQ_THREAD_TRACE_BASE regSQ_THREAD_TRACE_BASE; -typedef union SQ_THREAD_TRACE_BASE2 regSQ_THREAD_TRACE_BASE2; -typedef union SQ_THREAD_TRACE_BUF0_BASE regSQ_THREAD_TRACE_BUF0_BASE; -typedef union SQ_THREAD_TRACE_BUF0_SIZE regSQ_THREAD_TRACE_BUF0_SIZE; -typedef union SQ_THREAD_TRACE_BUF1_BASE regSQ_THREAD_TRACE_BUF1_BASE; -typedef union SQ_THREAD_TRACE_BUF1_SIZE regSQ_THREAD_TRACE_BUF1_SIZE; -typedef union SQ_THREAD_TRACE_CNTR regSQ_THREAD_TRACE_CNTR; -typedef union SQ_THREAD_TRACE_CTRL regSQ_THREAD_TRACE_CTRL; -typedef union SQ_THREAD_TRACE_DROPPED_CNTR regSQ_THREAD_TRACE_DROPPED_CNTR; -typedef union SQ_THREAD_TRACE_GFX_DRAW_CNTR regSQ_THREAD_TRACE_GFX_DRAW_CNTR; -typedef union SQ_THREAD_TRACE_GFX_MARKER_CNTR regSQ_THREAD_TRACE_GFX_MARKER_CNTR; -typedef union SQ_THREAD_TRACE_HIWATER regSQ_THREAD_TRACE_HIWATER; -typedef union SQ_THREAD_TRACE_HP3D_DRAW_CNTR regSQ_THREAD_TRACE_HP3D_DRAW_CNTR; -typedef union SQ_THREAD_TRACE_HP3D_MARKER_CNTR regSQ_THREAD_TRACE_HP3D_MARKER_CNTR; -typedef union SQ_THREAD_TRACE_MASK regSQ_THREAD_TRACE_MASK; -typedef union SQ_THREAD_TRACE_MODE regSQ_THREAD_TRACE_MODE; -typedef union SQ_THREAD_TRACE_PERF_MASK regSQ_THREAD_TRACE_PERF_MASK; -typedef union SQ_THREAD_TRACE_SIZE regSQ_THREAD_TRACE_SIZE; -typedef union SQ_THREAD_TRACE_STATUS regSQ_THREAD_TRACE_STATUS; -typedef union SQ_THREAD_TRACE_STATUS2 regSQ_THREAD_TRACE_STATUS2; -typedef union SQ_THREAD_TRACE_TOKEN_MASK regSQ_THREAD_TRACE_TOKEN_MASK; -typedef union SQ_THREAD_TRACE_TOKEN_MASK2 regSQ_THREAD_TRACE_TOKEN_MASK2; -typedef union SQ_THREAD_TRACE_USERDATA_0 regSQ_THREAD_TRACE_USERDATA_0; -typedef union SQ_THREAD_TRACE_USERDATA_1 regSQ_THREAD_TRACE_USERDATA_1; -typedef union SQ_THREAD_TRACE_USERDATA_2 regSQ_THREAD_TRACE_USERDATA_2; -typedef union SQ_THREAD_TRACE_USERDATA_3 regSQ_THREAD_TRACE_USERDATA_3; -typedef union SQ_THREAD_TRACE_USERDATA_4 regSQ_THREAD_TRACE_USERDATA_4; -typedef union SQ_THREAD_TRACE_USERDATA_5 regSQ_THREAD_TRACE_USERDATA_5; -typedef union SQ_THREAD_TRACE_USERDATA_6 regSQ_THREAD_TRACE_USERDATA_6; -typedef union SQ_THREAD_TRACE_USERDATA_7 regSQ_THREAD_TRACE_USERDATA_7; -typedef union SQ_THREAD_TRACE_WORD_CMN regSQ_THREAD_TRACE_WORD_CMN; -typedef union SQ_THREAD_TRACE_WORD_EVENT regSQ_THREAD_TRACE_WORD_EVENT; -typedef union SQ_THREAD_TRACE_WORD_INST regSQ_THREAD_TRACE_WORD_INST; -typedef union SQ_THREAD_TRACE_WORD_INST_PC_1_OF_2 regSQ_THREAD_TRACE_WORD_INST_PC_1_OF_2; -typedef union SQ_THREAD_TRACE_WORD_INST_PC_2_OF_2 regSQ_THREAD_TRACE_WORD_INST_PC_2_OF_2; -typedef union SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2 regSQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2; -typedef union SQ_THREAD_TRACE_WORD_INST_USERDATA_2_OF_2 regSQ_THREAD_TRACE_WORD_INST_USERDATA_2_OF_2; -typedef union SQ_THREAD_TRACE_WORD_ISSUE regSQ_THREAD_TRACE_WORD_ISSUE; -typedef union SQ_THREAD_TRACE_WORD_MISC regSQ_THREAD_TRACE_WORD_MISC; -typedef union SQ_THREAD_TRACE_WORD_PERF_1_OF_2 regSQ_THREAD_TRACE_WORD_PERF_1_OF_2; -typedef union SQ_THREAD_TRACE_WORD_PERF_2_OF_2 regSQ_THREAD_TRACE_WORD_PERF_2_OF_2; -typedef union SQ_THREAD_TRACE_WORD_REG_1_OF_2 regSQ_THREAD_TRACE_WORD_REG_1_OF_2; -typedef union SQ_THREAD_TRACE_WORD_REG_2_OF_2 regSQ_THREAD_TRACE_WORD_REG_2_OF_2; -typedef union SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2 regSQ_THREAD_TRACE_WORD_REG_CS_1_OF_2; -typedef union SQ_THREAD_TRACE_WORD_REG_CS_2_OF_2 regSQ_THREAD_TRACE_WORD_REG_CS_2_OF_2; -typedef union SQ_THREAD_TRACE_WORD_TIMESTAMP_1_OF_2 regSQ_THREAD_TRACE_WORD_TIMESTAMP_1_OF_2; -typedef union SQ_THREAD_TRACE_WORD_TIMESTAMP_2_OF_2 regSQ_THREAD_TRACE_WORD_TIMESTAMP_2_OF_2; -typedef union SQ_THREAD_TRACE_WORD_WAVE regSQ_THREAD_TRACE_WORD_WAVE; -typedef union SQ_THREAD_TRACE_WORD_WAVE_START regSQ_THREAD_TRACE_WORD_WAVE_START; -typedef union SQ_THREAD_TRACE_WPTR regSQ_THREAD_TRACE_WPTR; -typedef union SQ_TIME_HI regSQ_TIME_HI; -typedef union SQ_TIME_LO regSQ_TIME_LO; -typedef union SQ_UTCL1_CNTL1 regSQ_UTCL1_CNTL1; -typedef union SQ_UTCL1_CNTL2 regSQ_UTCL1_CNTL2; -typedef union SQ_UTCL1_STATUS regSQ_UTCL1_STATUS; -typedef union SQ_VINTRP regSQ_VINTRP; -typedef union SQ_VOP1 regSQ_VOP1; -typedef union SQ_VOP2 regSQ_VOP2; -typedef union SQ_VOP3P_0 regSQ_VOP3P_0; -typedef union SQ_VOP3P_1 regSQ_VOP3P_1; -typedef union SQ_VOP3_0 regSQ_VOP3_0; -typedef union SQ_VOP3_0_SDST_ENC regSQ_VOP3_0_SDST_ENC; -typedef union SQ_VOP3_1 regSQ_VOP3_1; -typedef union SQ_VOPC regSQ_VOPC; -typedef union SQ_VOP_DPP regSQ_VOP_DPP; -typedef union SQ_VOP_SDWA regSQ_VOP_SDWA; -typedef union SQ_VOP_SDWA_SDST_ENC regSQ_VOP_SDWA_SDST_ENC; -typedef union SQ_WATCH0_ADDR_H regSQ_WATCH0_ADDR_H; -typedef union SQ_WATCH0_ADDR_L regSQ_WATCH0_ADDR_L; -typedef union SQ_WATCH0_CNTL regSQ_WATCH0_CNTL; -typedef union SQ_WATCH1_ADDR_H regSQ_WATCH1_ADDR_H; -typedef union SQ_WATCH1_ADDR_L regSQ_WATCH1_ADDR_L; -typedef union SQ_WATCH1_CNTL regSQ_WATCH1_CNTL; -typedef union SQ_WATCH2_ADDR_H regSQ_WATCH2_ADDR_H; -typedef union SQ_WATCH2_ADDR_L regSQ_WATCH2_ADDR_L; -typedef union SQ_WATCH2_CNTL regSQ_WATCH2_CNTL; -typedef union SQ_WATCH3_ADDR_H regSQ_WATCH3_ADDR_H; -typedef union SQ_WATCH3_ADDR_L regSQ_WATCH3_ADDR_L; -typedef union SQ_WATCH3_CNTL regSQ_WATCH3_CNTL; -typedef union SQ_WREXEC_EXEC_HI regSQ_WREXEC_EXEC_HI; -typedef union SQ_WREXEC_EXEC_LO regSQ_WREXEC_EXEC_LO; -typedef union SX_BLEND_OPT_CONTROL regSX_BLEND_OPT_CONTROL; -typedef union SX_BLEND_OPT_EPSILON regSX_BLEND_OPT_EPSILON; -typedef union SX_MRT0_BLEND_OPT regSX_MRT0_BLEND_OPT; -typedef union SX_MRT1_BLEND_OPT regSX_MRT1_BLEND_OPT; -typedef union SX_MRT2_BLEND_OPT regSX_MRT2_BLEND_OPT; -typedef union SX_MRT3_BLEND_OPT regSX_MRT3_BLEND_OPT; -typedef union SX_MRT4_BLEND_OPT regSX_MRT4_BLEND_OPT; -typedef union SX_MRT5_BLEND_OPT regSX_MRT5_BLEND_OPT; -typedef union SX_MRT6_BLEND_OPT regSX_MRT6_BLEND_OPT; -typedef union SX_MRT7_BLEND_OPT regSX_MRT7_BLEND_OPT; -typedef union SX_PERFCOUNTER0_HI regSX_PERFCOUNTER0_HI; -typedef union SX_PERFCOUNTER0_LO regSX_PERFCOUNTER0_LO; -typedef union SX_PERFCOUNTER0_SELECT regSX_PERFCOUNTER0_SELECT; -typedef union SX_PERFCOUNTER0_SELECT1 regSX_PERFCOUNTER0_SELECT1; -typedef union SX_PERFCOUNTER1_HI regSX_PERFCOUNTER1_HI; -typedef union SX_PERFCOUNTER1_LO regSX_PERFCOUNTER1_LO; -typedef union SX_PERFCOUNTER1_SELECT regSX_PERFCOUNTER1_SELECT; -typedef union SX_PERFCOUNTER1_SELECT1 regSX_PERFCOUNTER1_SELECT1; -typedef union SX_PERFCOUNTER2_HI regSX_PERFCOUNTER2_HI; -typedef union SX_PERFCOUNTER2_LO regSX_PERFCOUNTER2_LO; -typedef union SX_PERFCOUNTER2_SELECT regSX_PERFCOUNTER2_SELECT; -typedef union SX_PERFCOUNTER3_HI regSX_PERFCOUNTER3_HI; -typedef union SX_PERFCOUNTER3_LO regSX_PERFCOUNTER3_LO; -typedef union SX_PERFCOUNTER3_SELECT regSX_PERFCOUNTER3_SELECT; -typedef union SX_PS_DOWNCONVERT regSX_PS_DOWNCONVERT; -typedef union SX_PS_DOWNCONVERT_CONTROL regSX_PS_DOWNCONVERT_CONTROL; -typedef union TA_BC_BASE_ADDR regTA_BC_BASE_ADDR; -typedef union TA_BC_BASE_ADDR_HI regTA_BC_BASE_ADDR_HI; -typedef union TA_CGTT_CTRL regTA_CGTT_CTRL; -typedef union TA_CNTL regTA_CNTL; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union TA_CNTL2 regTA_CNTL2; -#endif -typedef union TA_CNTL_AUX regTA_CNTL_AUX; -typedef union TA_CS_BC_BASE_ADDR regTA_CS_BC_BASE_ADDR; -typedef union TA_CS_BC_BASE_ADDR_HI regTA_CS_BC_BASE_ADDR_HI; -typedef union TA_DSM_CNTL regTA_DSM_CNTL; -typedef union TA_DSM_CNTL2 regTA_DSM_CNTL2; -typedef union TA_EDC_CNT regTA_EDC_CNT; -typedef union TA_PERFCOUNTER0_HI regTA_PERFCOUNTER0_HI; -typedef union TA_PERFCOUNTER0_LO regTA_PERFCOUNTER0_LO; -typedef union TA_PERFCOUNTER0_SELECT regTA_PERFCOUNTER0_SELECT; -typedef union TA_PERFCOUNTER0_SELECT1 regTA_PERFCOUNTER0_SELECT1; -typedef union TA_PERFCOUNTER1_HI regTA_PERFCOUNTER1_HI; -typedef union TA_PERFCOUNTER1_LO regTA_PERFCOUNTER1_LO; -typedef union TA_PERFCOUNTER1_SELECT regTA_PERFCOUNTER1_SELECT; -typedef union TA_POWER_CNTL regTA_POWER_CNTL; -typedef union TA_RESERVED_010C regTA_RESERVED_010C; -typedef union TA_SCRATCH regTA_SCRATCH; -typedef union TA_STATUS regTA_STATUS; -typedef union TCA_PERFCOUNTER0_HI regTCA_PERFCOUNTER0_HI; -typedef union TCA_PERFCOUNTER0_LO regTCA_PERFCOUNTER0_LO; -typedef union TCA_PERFCOUNTER0_SELECT regTCA_PERFCOUNTER0_SELECT; -typedef union TCA_PERFCOUNTER0_SELECT1 regTCA_PERFCOUNTER0_SELECT1; -typedef union TCA_PERFCOUNTER1_HI regTCA_PERFCOUNTER1_HI; -typedef union TCA_PERFCOUNTER1_LO regTCA_PERFCOUNTER1_LO; -typedef union TCA_PERFCOUNTER1_SELECT regTCA_PERFCOUNTER1_SELECT; -typedef union TCA_PERFCOUNTER1_SELECT1 regTCA_PERFCOUNTER1_SELECT1; -typedef union TCA_PERFCOUNTER2_HI regTCA_PERFCOUNTER2_HI; -typedef union TCA_PERFCOUNTER2_LO regTCA_PERFCOUNTER2_LO; -typedef union TCA_PERFCOUNTER2_SELECT regTCA_PERFCOUNTER2_SELECT; -typedef union TCA_PERFCOUNTER3_HI regTCA_PERFCOUNTER3_HI; -typedef union TCA_PERFCOUNTER3_LO regTCA_PERFCOUNTER3_LO; -typedef union TCA_PERFCOUNTER3_SELECT regTCA_PERFCOUNTER3_SELECT; -typedef union TCC_PERFCOUNTER0_HI regTCC_PERFCOUNTER0_HI; -typedef union TCC_PERFCOUNTER0_LO regTCC_PERFCOUNTER0_LO; -typedef union TCC_PERFCOUNTER0_SELECT regTCC_PERFCOUNTER0_SELECT; -typedef union TCC_PERFCOUNTER0_SELECT1 regTCC_PERFCOUNTER0_SELECT1; -typedef union TCC_PERFCOUNTER1_HI regTCC_PERFCOUNTER1_HI; -typedef union TCC_PERFCOUNTER1_LO regTCC_PERFCOUNTER1_LO; -typedef union TCC_PERFCOUNTER1_SELECT regTCC_PERFCOUNTER1_SELECT; -typedef union TCC_PERFCOUNTER1_SELECT1 regTCC_PERFCOUNTER1_SELECT1; -typedef union TCC_PERFCOUNTER2_HI regTCC_PERFCOUNTER2_HI; -typedef union TCC_PERFCOUNTER2_LO regTCC_PERFCOUNTER2_LO; -typedef union TCC_PERFCOUNTER2_SELECT regTCC_PERFCOUNTER2_SELECT; -typedef union TCC_PERFCOUNTER3_HI regTCC_PERFCOUNTER3_HI; -typedef union TCC_PERFCOUNTER3_LO regTCC_PERFCOUNTER3_LO; -typedef union TCC_PERFCOUNTER3_SELECT regTCC_PERFCOUNTER3_SELECT; -typedef union TCP_PERFCOUNTER0_HI regTCP_PERFCOUNTER0_HI; -typedef union TCP_PERFCOUNTER0_LO regTCP_PERFCOUNTER0_LO; -typedef union TCP_PERFCOUNTER0_SELECT regTCP_PERFCOUNTER0_SELECT; -typedef union TCP_PERFCOUNTER0_SELECT1 regTCP_PERFCOUNTER0_SELECT1; -typedef union TCP_PERFCOUNTER1_HI regTCP_PERFCOUNTER1_HI; -typedef union TCP_PERFCOUNTER1_LO regTCP_PERFCOUNTER1_LO; -typedef union TCP_PERFCOUNTER1_SELECT regTCP_PERFCOUNTER1_SELECT; -typedef union TCP_PERFCOUNTER1_SELECT1 regTCP_PERFCOUNTER1_SELECT1; -typedef union TCP_PERFCOUNTER2_HI regTCP_PERFCOUNTER2_HI; -typedef union TCP_PERFCOUNTER2_LO regTCP_PERFCOUNTER2_LO; -typedef union TCP_PERFCOUNTER2_SELECT regTCP_PERFCOUNTER2_SELECT; -typedef union TCP_PERFCOUNTER3_HI regTCP_PERFCOUNTER3_HI; -typedef union TCP_PERFCOUNTER3_LO regTCP_PERFCOUNTER3_LO; -typedef union TCP_PERFCOUNTER3_SELECT regTCP_PERFCOUNTER3_SELECT; -typedef union TCP_PERFCOUNTER_FILTER regTCP_PERFCOUNTER_FILTER; -typedef union TCP_PERFCOUNTER_FILTER2 regTCP_PERFCOUNTER_FILTER2; -typedef union TCP_PERFCOUNTER_FILTER_EN regTCP_PERFCOUNTER_FILTER_EN; -typedef union TD_PERFCOUNTER0_HI regTD_PERFCOUNTER0_HI; -typedef union TD_PERFCOUNTER0_LO regTD_PERFCOUNTER0_LO; -typedef union TD_PERFCOUNTER0_SELECT regTD_PERFCOUNTER0_SELECT; -typedef union TD_PERFCOUNTER0_SELECT1 regTD_PERFCOUNTER0_SELECT1; -typedef union TD_PERFCOUNTER1_HI regTD_PERFCOUNTER1_HI; -typedef union TD_PERFCOUNTER1_LO regTD_PERFCOUNTER1_LO; -typedef union TD_PERFCOUNTER1_SELECT regTD_PERFCOUNTER1_SELECT; -typedef union UMCCH0_PerfMonCtl1 regUMCCH0_PerfMonCtl1; -typedef union UMCCH0_PerfMonCtl2 regUMCCH0_PerfMonCtl2; -typedef union UMCCH0_PerfMonCtl3 regUMCCH0_PerfMonCtl3; -typedef union UMCCH0_PerfMonCtl4 regUMCCH0_PerfMonCtl4; -typedef union UMCCH0_PerfMonCtl5 regUMCCH0_PerfMonCtl5; -typedef union UMCCH0_PerfMonCtlClk regUMCCH0_PerfMonCtlClk; -typedef union UMCCH0_PerfMonCtr1_Hi regUMCCH0_PerfMonCtr1_Hi; -typedef union UMCCH0_PerfMonCtr1_Lo regUMCCH0_PerfMonCtr1_Lo; -typedef union UMCCH0_PerfMonCtr2_Hi regUMCCH0_PerfMonCtr2_Hi; -typedef union UMCCH0_PerfMonCtr2_Lo regUMCCH0_PerfMonCtr2_Lo; -typedef union UMCCH0_PerfMonCtr3_Hi regUMCCH0_PerfMonCtr3_Hi; -typedef union UMCCH0_PerfMonCtr3_Lo regUMCCH0_PerfMonCtr3_Lo; -typedef union UMCCH0_PerfMonCtr4_Hi regUMCCH0_PerfMonCtr4_Hi; -typedef union UMCCH0_PerfMonCtr4_Lo regUMCCH0_PerfMonCtr4_Lo; -typedef union UMCCH0_PerfMonCtr5_Hi regUMCCH0_PerfMonCtr5_Hi; -typedef union UMCCH0_PerfMonCtr5_Lo regUMCCH0_PerfMonCtr5_Lo; -typedef union UMCCH0_PerfMonCtrClk_Hi regUMCCH0_PerfMonCtrClk_Hi; -typedef union UMCCH0_PerfMonCtrClk_Lo regUMCCH0_PerfMonCtrClk_Lo; -typedef union UMCCH1_PerfMonCtl1 regUMCCH1_PerfMonCtl1; -typedef union UMCCH1_PerfMonCtl2 regUMCCH1_PerfMonCtl2; -typedef union UMCCH1_PerfMonCtl3 regUMCCH1_PerfMonCtl3; -typedef union UMCCH1_PerfMonCtl4 regUMCCH1_PerfMonCtl4; -typedef union UMCCH1_PerfMonCtl5 regUMCCH1_PerfMonCtl5; -typedef union UMCCH1_PerfMonCtlClk regUMCCH1_PerfMonCtlClk; -typedef union UMCCH1_PerfMonCtr1_Hi regUMCCH1_PerfMonCtr1_Hi; -typedef union UMCCH1_PerfMonCtr1_Lo regUMCCH1_PerfMonCtr1_Lo; -typedef union UMCCH1_PerfMonCtr2_Hi regUMCCH1_PerfMonCtr2_Hi; -typedef union UMCCH1_PerfMonCtr2_Lo regUMCCH1_PerfMonCtr2_Lo; -typedef union UMCCH1_PerfMonCtr3_Hi regUMCCH1_PerfMonCtr3_Hi; -typedef union UMCCH1_PerfMonCtr3_Lo regUMCCH1_PerfMonCtr3_Lo; -typedef union UMCCH1_PerfMonCtr4_Hi regUMCCH1_PerfMonCtr4_Hi; -typedef union UMCCH1_PerfMonCtr4_Lo regUMCCH1_PerfMonCtr4_Lo; -typedef union UMCCH1_PerfMonCtr5_Hi regUMCCH1_PerfMonCtr5_Hi; -typedef union UMCCH1_PerfMonCtr5_Lo regUMCCH1_PerfMonCtr5_Lo; -typedef union UMCCH1_PerfMonCtrClk_Hi regUMCCH1_PerfMonCtrClk_Hi; -typedef union UMCCH1_PerfMonCtrClk_Lo regUMCCH1_PerfMonCtrClk_Lo; -typedef union UMCCH2_PerfMonCtl1 regUMCCH2_PerfMonCtl1; -typedef union UMCCH2_PerfMonCtl2 regUMCCH2_PerfMonCtl2; -typedef union UMCCH2_PerfMonCtl3 regUMCCH2_PerfMonCtl3; -typedef union UMCCH2_PerfMonCtl4 regUMCCH2_PerfMonCtl4; -typedef union UMCCH2_PerfMonCtl5 regUMCCH2_PerfMonCtl5; -typedef union UMCCH2_PerfMonCtlClk regUMCCH2_PerfMonCtlClk; -typedef union UMCCH2_PerfMonCtr1_Hi regUMCCH2_PerfMonCtr1_Hi; -typedef union UMCCH2_PerfMonCtr1_Lo regUMCCH2_PerfMonCtr1_Lo; -typedef union UMCCH2_PerfMonCtr2_Hi regUMCCH2_PerfMonCtr2_Hi; -typedef union UMCCH2_PerfMonCtr2_Lo regUMCCH2_PerfMonCtr2_Lo; -typedef union UMCCH2_PerfMonCtr3_Hi regUMCCH2_PerfMonCtr3_Hi; -typedef union UMCCH2_PerfMonCtr3_Lo regUMCCH2_PerfMonCtr3_Lo; -typedef union UMCCH2_PerfMonCtr4_Hi regUMCCH2_PerfMonCtr4_Hi; -typedef union UMCCH2_PerfMonCtr4_Lo regUMCCH2_PerfMonCtr4_Lo; -typedef union UMCCH2_PerfMonCtr5_Hi regUMCCH2_PerfMonCtr5_Hi; -typedef union UMCCH2_PerfMonCtr5_Lo regUMCCH2_PerfMonCtr5_Lo; -typedef union UMCCH2_PerfMonCtrClk_Hi regUMCCH2_PerfMonCtrClk_Hi; -typedef union UMCCH2_PerfMonCtrClk_Lo regUMCCH2_PerfMonCtrClk_Lo; -typedef union UMCCH3_PerfMonCtl1 regUMCCH3_PerfMonCtl1; -typedef union UMCCH3_PerfMonCtl2 regUMCCH3_PerfMonCtl2; -typedef union UMCCH3_PerfMonCtl3 regUMCCH3_PerfMonCtl3; -typedef union UMCCH3_PerfMonCtl4 regUMCCH3_PerfMonCtl4; -typedef union UMCCH3_PerfMonCtl5 regUMCCH3_PerfMonCtl5; -typedef union UMCCH3_PerfMonCtlClk regUMCCH3_PerfMonCtlClk; -typedef union UMCCH3_PerfMonCtr1_Hi regUMCCH3_PerfMonCtr1_Hi; -typedef union UMCCH3_PerfMonCtr1_Lo regUMCCH3_PerfMonCtr1_Lo; -typedef union UMCCH3_PerfMonCtr2_Hi regUMCCH3_PerfMonCtr2_Hi; -typedef union UMCCH3_PerfMonCtr2_Lo regUMCCH3_PerfMonCtr2_Lo; -typedef union UMCCH3_PerfMonCtr3_Hi regUMCCH3_PerfMonCtr3_Hi; -typedef union UMCCH3_PerfMonCtr3_Lo regUMCCH3_PerfMonCtr3_Lo; -typedef union UMCCH3_PerfMonCtr4_Hi regUMCCH3_PerfMonCtr4_Hi; -typedef union UMCCH3_PerfMonCtr4_Lo regUMCCH3_PerfMonCtr4_Lo; -typedef union UMCCH3_PerfMonCtr5_Hi regUMCCH3_PerfMonCtr5_Hi; -typedef union UMCCH3_PerfMonCtr5_Lo regUMCCH3_PerfMonCtr5_Lo; -typedef union UMCCH3_PerfMonCtrClk_Hi regUMCCH3_PerfMonCtrClk_Hi; -typedef union UMCCH3_PerfMonCtrClk_Lo regUMCCH3_PerfMonCtrClk_Lo; -typedef union UMCCH4_PerfMonCtl1 regUMCCH4_PerfMonCtl1; -typedef union UMCCH4_PerfMonCtl2 regUMCCH4_PerfMonCtl2; -typedef union UMCCH4_PerfMonCtl3 regUMCCH4_PerfMonCtl3; -typedef union UMCCH4_PerfMonCtl4 regUMCCH4_PerfMonCtl4; -typedef union UMCCH4_PerfMonCtl5 regUMCCH4_PerfMonCtl5; -typedef union UMCCH4_PerfMonCtlClk regUMCCH4_PerfMonCtlClk; -typedef union UMCCH4_PerfMonCtr1_Hi regUMCCH4_PerfMonCtr1_Hi; -typedef union UMCCH4_PerfMonCtr1_Lo regUMCCH4_PerfMonCtr1_Lo; -typedef union UMCCH4_PerfMonCtr2_Hi regUMCCH4_PerfMonCtr2_Hi; -typedef union UMCCH4_PerfMonCtr2_Lo regUMCCH4_PerfMonCtr2_Lo; -typedef union UMCCH4_PerfMonCtr3_Hi regUMCCH4_PerfMonCtr3_Hi; -typedef union UMCCH4_PerfMonCtr3_Lo regUMCCH4_PerfMonCtr3_Lo; -typedef union UMCCH4_PerfMonCtr4_Hi regUMCCH4_PerfMonCtr4_Hi; -typedef union UMCCH4_PerfMonCtr4_Lo regUMCCH4_PerfMonCtr4_Lo; -typedef union UMCCH4_PerfMonCtr5_Hi regUMCCH4_PerfMonCtr5_Hi; -typedef union UMCCH4_PerfMonCtr5_Lo regUMCCH4_PerfMonCtr5_Lo; -typedef union UMCCH4_PerfMonCtrClk_Hi regUMCCH4_PerfMonCtrClk_Hi; -typedef union UMCCH4_PerfMonCtrClk_Lo regUMCCH4_PerfMonCtrClk_Lo; -typedef union UMCCH5_PerfMonCtl1 regUMCCH5_PerfMonCtl1; -typedef union UMCCH5_PerfMonCtl2 regUMCCH5_PerfMonCtl2; -typedef union UMCCH5_PerfMonCtl3 regUMCCH5_PerfMonCtl3; -typedef union UMCCH5_PerfMonCtl4 regUMCCH5_PerfMonCtl4; -typedef union UMCCH5_PerfMonCtl5 regUMCCH5_PerfMonCtl5; -typedef union UMCCH5_PerfMonCtlClk regUMCCH5_PerfMonCtlClk; -typedef union UMCCH5_PerfMonCtr1_Hi regUMCCH5_PerfMonCtr1_Hi; -typedef union UMCCH5_PerfMonCtr1_Lo regUMCCH5_PerfMonCtr1_Lo; -typedef union UMCCH5_PerfMonCtr2_Hi regUMCCH5_PerfMonCtr2_Hi; -typedef union UMCCH5_PerfMonCtr2_Lo regUMCCH5_PerfMonCtr2_Lo; -typedef union UMCCH5_PerfMonCtr3_Hi regUMCCH5_PerfMonCtr3_Hi; -typedef union UMCCH5_PerfMonCtr3_Lo regUMCCH5_PerfMonCtr3_Lo; -typedef union UMCCH5_PerfMonCtr4_Hi regUMCCH5_PerfMonCtr4_Hi; -typedef union UMCCH5_PerfMonCtr4_Lo regUMCCH5_PerfMonCtr4_Lo; -typedef union UMCCH5_PerfMonCtr5_Hi regUMCCH5_PerfMonCtr5_Hi; -typedef union UMCCH5_PerfMonCtr5_Lo regUMCCH5_PerfMonCtr5_Lo; -typedef union UMCCH5_PerfMonCtrClk_Hi regUMCCH5_PerfMonCtrClk_Hi; -typedef union UMCCH5_PerfMonCtrClk_Lo regUMCCH5_PerfMonCtrClk_Lo; -typedef union UMCCH6_PerfMonCtl1 regUMCCH6_PerfMonCtl1; -typedef union UMCCH6_PerfMonCtl2 regUMCCH6_PerfMonCtl2; -typedef union UMCCH6_PerfMonCtl3 regUMCCH6_PerfMonCtl3; -typedef union UMCCH6_PerfMonCtl4 regUMCCH6_PerfMonCtl4; -typedef union UMCCH6_PerfMonCtl5 regUMCCH6_PerfMonCtl5; -typedef union UMCCH6_PerfMonCtlClk regUMCCH6_PerfMonCtlClk; -typedef union UMCCH6_PerfMonCtr1_Hi regUMCCH6_PerfMonCtr1_Hi; -typedef union UMCCH6_PerfMonCtr1_Lo regUMCCH6_PerfMonCtr1_Lo; -typedef union UMCCH6_PerfMonCtr2_Hi regUMCCH6_PerfMonCtr2_Hi; -typedef union UMCCH6_PerfMonCtr2_Lo regUMCCH6_PerfMonCtr2_Lo; -typedef union UMCCH6_PerfMonCtr3_Hi regUMCCH6_PerfMonCtr3_Hi; -typedef union UMCCH6_PerfMonCtr3_Lo regUMCCH6_PerfMonCtr3_Lo; -typedef union UMCCH6_PerfMonCtr4_Hi regUMCCH6_PerfMonCtr4_Hi; -typedef union UMCCH6_PerfMonCtr4_Lo regUMCCH6_PerfMonCtr4_Lo; -typedef union UMCCH6_PerfMonCtr5_Hi regUMCCH6_PerfMonCtr5_Hi; -typedef union UMCCH6_PerfMonCtr5_Lo regUMCCH6_PerfMonCtr5_Lo; -typedef union UMCCH6_PerfMonCtrClk_Hi regUMCCH6_PerfMonCtrClk_Hi; -typedef union UMCCH6_PerfMonCtrClk_Lo regUMCCH6_PerfMonCtrClk_Lo; -typedef union UMCCH7_PerfMonCtl1 regUMCCH7_PerfMonCtl1; -typedef union UMCCH7_PerfMonCtl2 regUMCCH7_PerfMonCtl2; -typedef union UMCCH7_PerfMonCtl3 regUMCCH7_PerfMonCtl3; -typedef union UMCCH7_PerfMonCtl4 regUMCCH7_PerfMonCtl4; -typedef union UMCCH7_PerfMonCtl5 regUMCCH7_PerfMonCtl5; -typedef union UMCCH7_PerfMonCtlClk regUMCCH7_PerfMonCtlClk; -typedef union UMCCH7_PerfMonCtr1_Hi regUMCCH7_PerfMonCtr1_Hi; -typedef union UMCCH7_PerfMonCtr1_Lo regUMCCH7_PerfMonCtr1_Lo; -typedef union UMCCH7_PerfMonCtr2_Hi regUMCCH7_PerfMonCtr2_Hi; -typedef union UMCCH7_PerfMonCtr2_Lo regUMCCH7_PerfMonCtr2_Lo; -typedef union UMCCH7_PerfMonCtr3_Hi regUMCCH7_PerfMonCtr3_Hi; -typedef union UMCCH7_PerfMonCtr3_Lo regUMCCH7_PerfMonCtr3_Lo; -typedef union UMCCH7_PerfMonCtr4_Hi regUMCCH7_PerfMonCtr4_Hi; -typedef union UMCCH7_PerfMonCtr4_Lo regUMCCH7_PerfMonCtr4_Lo; -typedef union UMCCH7_PerfMonCtr5_Hi regUMCCH7_PerfMonCtr5_Hi; -typedef union UMCCH7_PerfMonCtr5_Lo regUMCCH7_PerfMonCtr5_Lo; -typedef union UMCCH7_PerfMonCtrClk_Hi regUMCCH7_PerfMonCtrClk_Hi; -typedef union UMCCH7_PerfMonCtrClk_Lo regUMCCH7_PerfMonCtrClk_Lo; -typedef union UMCCH8_PerfMonCtl1 regUMCCH8_PerfMonCtl1; -typedef union UMCCH8_PerfMonCtl2 regUMCCH8_PerfMonCtl2; -typedef union UMCCH8_PerfMonCtl3 regUMCCH8_PerfMonCtl3; -typedef union UMCCH8_PerfMonCtl4 regUMCCH8_PerfMonCtl4; -typedef union UMCCH8_PerfMonCtl5 regUMCCH8_PerfMonCtl5; -typedef union UMCCH8_PerfMonCtlClk regUMCCH8_PerfMonCtlClk; -typedef union UMCCH8_PerfMonCtr1_Hi regUMCCH8_PerfMonCtr1_Hi; -typedef union UMCCH8_PerfMonCtr1_Lo regUMCCH8_PerfMonCtr1_Lo; -typedef union UMCCH8_PerfMonCtr2_Hi regUMCCH8_PerfMonCtr2_Hi; -typedef union UMCCH8_PerfMonCtr2_Lo regUMCCH8_PerfMonCtr2_Lo; -typedef union UMCCH8_PerfMonCtr3_Hi regUMCCH8_PerfMonCtr3_Hi; -typedef union UMCCH8_PerfMonCtr3_Lo regUMCCH8_PerfMonCtr3_Lo; -typedef union UMCCH8_PerfMonCtr4_Hi regUMCCH8_PerfMonCtr4_Hi; -typedef union UMCCH8_PerfMonCtr4_Lo regUMCCH8_PerfMonCtr4_Lo; -typedef union UMCCH8_PerfMonCtr5_Hi regUMCCH8_PerfMonCtr5_Hi; -typedef union UMCCH8_PerfMonCtr5_Lo regUMCCH8_PerfMonCtr5_Lo; -typedef union UMCCH8_PerfMonCtrClk_Hi regUMCCH8_PerfMonCtrClk_Hi; -typedef union UMCCH8_PerfMonCtrClk_Lo regUMCCH8_PerfMonCtrClk_Lo; -typedef union UMCCH9_PerfMonCtl1 regUMCCH9_PerfMonCtl1; -typedef union UMCCH9_PerfMonCtl2 regUMCCH9_PerfMonCtl2; -typedef union UMCCH9_PerfMonCtl3 regUMCCH9_PerfMonCtl3; -typedef union UMCCH9_PerfMonCtl4 regUMCCH9_PerfMonCtl4; -typedef union UMCCH9_PerfMonCtl5 regUMCCH9_PerfMonCtl5; -typedef union UMCCH9_PerfMonCtlClk regUMCCH9_PerfMonCtlClk; -typedef union UMCCH9_PerfMonCtr1_Hi regUMCCH9_PerfMonCtr1_Hi; -typedef union UMCCH9_PerfMonCtr1_Lo regUMCCH9_PerfMonCtr1_Lo; -typedef union UMCCH9_PerfMonCtr2_Hi regUMCCH9_PerfMonCtr2_Hi; -typedef union UMCCH9_PerfMonCtr2_Lo regUMCCH9_PerfMonCtr2_Lo; -typedef union UMCCH9_PerfMonCtr3_Hi regUMCCH9_PerfMonCtr3_Hi; -typedef union UMCCH9_PerfMonCtr3_Lo regUMCCH9_PerfMonCtr3_Lo; -typedef union UMCCH9_PerfMonCtr4_Hi regUMCCH9_PerfMonCtr4_Hi; -typedef union UMCCH9_PerfMonCtr4_Lo regUMCCH9_PerfMonCtr4_Lo; -typedef union UMCCH9_PerfMonCtr5_Hi regUMCCH9_PerfMonCtr5_Hi; -typedef union UMCCH9_PerfMonCtr5_Lo regUMCCH9_PerfMonCtr5_Lo; -typedef union UMCCH9_PerfMonCtrClk_Hi regUMCCH9_PerfMonCtrClk_Hi; -typedef union UMCCH9_PerfMonCtrClk_Lo regUMCCH9_PerfMonCtrClk_Lo; -typedef union UMCCH10_PerfMonCtl1 regUMCCH10_PerfMonCtl1; -typedef union UMCCH10_PerfMonCtl2 regUMCCH10_PerfMonCtl2; -typedef union UMCCH10_PerfMonCtl3 regUMCCH10_PerfMonCtl3; -typedef union UMCCH10_PerfMonCtl4 regUMCCH10_PerfMonCtl4; -typedef union UMCCH10_PerfMonCtl5 regUMCCH10_PerfMonCtl5; -typedef union UMCCH10_PerfMonCtlClk regUMCCH10_PerfMonCtlClk; -typedef union UMCCH10_PerfMonCtr1_Hi regUMCCH10_PerfMonCtr1_Hi; -typedef union UMCCH10_PerfMonCtr1_Lo regUMCCH10_PerfMonCtr1_Lo; -typedef union UMCCH10_PerfMonCtr2_Hi regUMCCH10_PerfMonCtr2_Hi; -typedef union UMCCH10_PerfMonCtr2_Lo regUMCCH10_PerfMonCtr2_Lo; -typedef union UMCCH10_PerfMonCtr3_Hi regUMCCH10_PerfMonCtr3_Hi; -typedef union UMCCH10_PerfMonCtr3_Lo regUMCCH10_PerfMonCtr3_Lo; -typedef union UMCCH10_PerfMonCtr4_Hi regUMCCH10_PerfMonCtr4_Hi; -typedef union UMCCH10_PerfMonCtr4_Lo regUMCCH10_PerfMonCtr4_Lo; -typedef union UMCCH10_PerfMonCtr5_Hi regUMCCH10_PerfMonCtr5_Hi; -typedef union UMCCH10_PerfMonCtr5_Lo regUMCCH10_PerfMonCtr5_Lo; -typedef union UMCCH10_PerfMonCtrClk_Hi regUMCCH10_PerfMonCtrClk_Hi; -typedef union UMCCH10_PerfMonCtrClk_Lo regUMCCH10_PerfMonCtrClk_Lo; -typedef union UMCCH11_PerfMonCtl1 regUMCCH11_PerfMonCtl1; -typedef union UMCCH11_PerfMonCtl2 regUMCCH11_PerfMonCtl2; -typedef union UMCCH11_PerfMonCtl3 regUMCCH11_PerfMonCtl3; -typedef union UMCCH11_PerfMonCtl4 regUMCCH11_PerfMonCtl4; -typedef union UMCCH11_PerfMonCtl5 regUMCCH11_PerfMonCtl5; -typedef union UMCCH11_PerfMonCtlClk regUMCCH11_PerfMonCtlClk; -typedef union UMCCH11_PerfMonCtr1_Hi regUMCCH11_PerfMonCtr1_Hi; -typedef union UMCCH11_PerfMonCtr1_Lo regUMCCH11_PerfMonCtr1_Lo; -typedef union UMCCH11_PerfMonCtr2_Hi regUMCCH11_PerfMonCtr2_Hi; -typedef union UMCCH11_PerfMonCtr2_Lo regUMCCH11_PerfMonCtr2_Lo; -typedef union UMCCH11_PerfMonCtr3_Hi regUMCCH11_PerfMonCtr3_Hi; -typedef union UMCCH11_PerfMonCtr3_Lo regUMCCH11_PerfMonCtr3_Lo; -typedef union UMCCH11_PerfMonCtr4_Hi regUMCCH11_PerfMonCtr4_Hi; -typedef union UMCCH11_PerfMonCtr4_Lo regUMCCH11_PerfMonCtr4_Lo; -typedef union UMCCH11_PerfMonCtr5_Hi regUMCCH11_PerfMonCtr5_Hi; -typedef union UMCCH11_PerfMonCtr5_Lo regUMCCH11_PerfMonCtr5_Lo; -typedef union UMCCH11_PerfMonCtrClk_Hi regUMCCH11_PerfMonCtrClk_Hi; -typedef union UMCCH11_PerfMonCtrClk_Lo regUMCCH11_PerfMonCtrClk_Lo; -typedef union UMCCH12_PerfMonCtl1 regUMCCH12_PerfMonCtl1; -typedef union UMCCH12_PerfMonCtl2 regUMCCH12_PerfMonCtl2; -typedef union UMCCH12_PerfMonCtl3 regUMCCH12_PerfMonCtl3; -typedef union UMCCH12_PerfMonCtl4 regUMCCH12_PerfMonCtl4; -typedef union UMCCH12_PerfMonCtl5 regUMCCH12_PerfMonCtl5; -typedef union UMCCH12_PerfMonCtlClk regUMCCH12_PerfMonCtlClk; -typedef union UMCCH12_PerfMonCtr1_Hi regUMCCH12_PerfMonCtr1_Hi; -typedef union UMCCH12_PerfMonCtr1_Lo regUMCCH12_PerfMonCtr1_Lo; -typedef union UMCCH12_PerfMonCtr2_Hi regUMCCH12_PerfMonCtr2_Hi; -typedef union UMCCH12_PerfMonCtr2_Lo regUMCCH12_PerfMonCtr2_Lo; -typedef union UMCCH12_PerfMonCtr3_Hi regUMCCH12_PerfMonCtr3_Hi; -typedef union UMCCH12_PerfMonCtr3_Lo regUMCCH12_PerfMonCtr3_Lo; -typedef union UMCCH12_PerfMonCtr4_Hi regUMCCH12_PerfMonCtr4_Hi; -typedef union UMCCH12_PerfMonCtr4_Lo regUMCCH12_PerfMonCtr4_Lo; -typedef union UMCCH12_PerfMonCtr5_Hi regUMCCH12_PerfMonCtr5_Hi; -typedef union UMCCH12_PerfMonCtr5_Lo regUMCCH12_PerfMonCtr5_Lo; -typedef union UMCCH12_PerfMonCtrClk_Hi regUMCCH12_PerfMonCtrClk_Hi; -typedef union UMCCH12_PerfMonCtrClk_Lo regUMCCH12_PerfMonCtrClk_Lo; -typedef union UMCCH13_PerfMonCtl1 regUMCCH13_PerfMonCtl1; -typedef union UMCCH13_PerfMonCtl2 regUMCCH13_PerfMonCtl2; -typedef union UMCCH13_PerfMonCtl3 regUMCCH13_PerfMonCtl3; -typedef union UMCCH13_PerfMonCtl4 regUMCCH13_PerfMonCtl4; -typedef union UMCCH13_PerfMonCtl5 regUMCCH13_PerfMonCtl5; -typedef union UMCCH13_PerfMonCtlClk regUMCCH13_PerfMonCtlClk; -typedef union UMCCH13_PerfMonCtr1_Hi regUMCCH13_PerfMonCtr1_Hi; -typedef union UMCCH13_PerfMonCtr1_Lo regUMCCH13_PerfMonCtr1_Lo; -typedef union UMCCH13_PerfMonCtr2_Hi regUMCCH13_PerfMonCtr2_Hi; -typedef union UMCCH13_PerfMonCtr2_Lo regUMCCH13_PerfMonCtr2_Lo; -typedef union UMCCH13_PerfMonCtr3_Hi regUMCCH13_PerfMonCtr3_Hi; -typedef union UMCCH13_PerfMonCtr3_Lo regUMCCH13_PerfMonCtr3_Lo; -typedef union UMCCH13_PerfMonCtr4_Hi regUMCCH13_PerfMonCtr4_Hi; -typedef union UMCCH13_PerfMonCtr4_Lo regUMCCH13_PerfMonCtr4_Lo; -typedef union UMCCH13_PerfMonCtr5_Hi regUMCCH13_PerfMonCtr5_Hi; -typedef union UMCCH13_PerfMonCtr5_Lo regUMCCH13_PerfMonCtr5_Lo; -typedef union UMCCH13_PerfMonCtrClk_Hi regUMCCH13_PerfMonCtrClk_Hi; -typedef union UMCCH13_PerfMonCtrClk_Lo regUMCCH13_PerfMonCtrClk_Lo; -typedef union UMCCH14_PerfMonCtl1 regUMCCH14_PerfMonCtl1; -typedef union UMCCH14_PerfMonCtl2 regUMCCH14_PerfMonCtl2; -typedef union UMCCH14_PerfMonCtl3 regUMCCH14_PerfMonCtl3; -typedef union UMCCH14_PerfMonCtl4 regUMCCH14_PerfMonCtl4; -typedef union UMCCH14_PerfMonCtl5 regUMCCH14_PerfMonCtl5; -typedef union UMCCH14_PerfMonCtlClk regUMCCH14_PerfMonCtlClk; -typedef union UMCCH14_PerfMonCtr1_Hi regUMCCH14_PerfMonCtr1_Hi; -typedef union UMCCH14_PerfMonCtr1_Lo regUMCCH14_PerfMonCtr1_Lo; -typedef union UMCCH14_PerfMonCtr2_Hi regUMCCH14_PerfMonCtr2_Hi; -typedef union UMCCH14_PerfMonCtr2_Lo regUMCCH14_PerfMonCtr2_Lo; -typedef union UMCCH14_PerfMonCtr3_Hi regUMCCH14_PerfMonCtr3_Hi; -typedef union UMCCH14_PerfMonCtr3_Lo regUMCCH14_PerfMonCtr3_Lo; -typedef union UMCCH14_PerfMonCtr4_Hi regUMCCH14_PerfMonCtr4_Hi; -typedef union UMCCH14_PerfMonCtr4_Lo regUMCCH14_PerfMonCtr4_Lo; -typedef union UMCCH14_PerfMonCtr5_Hi regUMCCH14_PerfMonCtr5_Hi; -typedef union UMCCH14_PerfMonCtr5_Lo regUMCCH14_PerfMonCtr5_Lo; -typedef union UMCCH14_PerfMonCtrClk_Hi regUMCCH14_PerfMonCtrClk_Hi; -typedef union UMCCH14_PerfMonCtrClk_Lo regUMCCH14_PerfMonCtrClk_Lo; -typedef union UMCCH15_PerfMonCtl1 regUMCCH15_PerfMonCtl1; -typedef union UMCCH15_PerfMonCtl2 regUMCCH15_PerfMonCtl2; -typedef union UMCCH15_PerfMonCtl3 regUMCCH15_PerfMonCtl3; -typedef union UMCCH15_PerfMonCtl4 regUMCCH15_PerfMonCtl4; -typedef union UMCCH15_PerfMonCtl5 regUMCCH15_PerfMonCtl5; -typedef union UMCCH15_PerfMonCtlClk regUMCCH15_PerfMonCtlClk; -typedef union UMCCH15_PerfMonCtr1_Hi regUMCCH15_PerfMonCtr1_Hi; -typedef union UMCCH15_PerfMonCtr1_Lo regUMCCH15_PerfMonCtr1_Lo; -typedef union UMCCH15_PerfMonCtr2_Hi regUMCCH15_PerfMonCtr2_Hi; -typedef union UMCCH15_PerfMonCtr2_Lo regUMCCH15_PerfMonCtr2_Lo; -typedef union UMCCH15_PerfMonCtr3_Hi regUMCCH15_PerfMonCtr3_Hi; -typedef union UMCCH15_PerfMonCtr3_Lo regUMCCH15_PerfMonCtr3_Lo; -typedef union UMCCH15_PerfMonCtr4_Hi regUMCCH15_PerfMonCtr4_Hi; -typedef union UMCCH15_PerfMonCtr4_Lo regUMCCH15_PerfMonCtr4_Lo; -typedef union UMCCH15_PerfMonCtr5_Hi regUMCCH15_PerfMonCtr5_Hi; -typedef union UMCCH15_PerfMonCtr5_Lo regUMCCH15_PerfMonCtr5_Lo; -typedef union UMCCH15_PerfMonCtrClk_Hi regUMCCH15_PerfMonCtrClk_Hi; -typedef union UMCCH15_PerfMonCtrClk_Lo regUMCCH15_PerfMonCtrClk_Lo; -typedef union UMCCH16_PerfMonCtl1 regUMCCH16_PerfMonCtl1; -typedef union UMCCH16_PerfMonCtl2 regUMCCH16_PerfMonCtl2; -typedef union UMCCH16_PerfMonCtl3 regUMCCH16_PerfMonCtl3; -typedef union UMCCH16_PerfMonCtl4 regUMCCH16_PerfMonCtl4; -typedef union UMCCH16_PerfMonCtl5 regUMCCH16_PerfMonCtl5; -typedef union UMCCH16_PerfMonCtlClk regUMCCH16_PerfMonCtlClk; -typedef union UMCCH16_PerfMonCtr1_Hi regUMCCH16_PerfMonCtr1_Hi; -typedef union UMCCH16_PerfMonCtr1_Lo regUMCCH16_PerfMonCtr1_Lo; -typedef union UMCCH16_PerfMonCtr2_Hi regUMCCH16_PerfMonCtr2_Hi; -typedef union UMCCH16_PerfMonCtr2_Lo regUMCCH16_PerfMonCtr2_Lo; -typedef union UMCCH16_PerfMonCtr3_Hi regUMCCH16_PerfMonCtr3_Hi; -typedef union UMCCH16_PerfMonCtr3_Lo regUMCCH16_PerfMonCtr3_Lo; -typedef union UMCCH16_PerfMonCtr4_Hi regUMCCH16_PerfMonCtr4_Hi; -typedef union UMCCH16_PerfMonCtr4_Lo regUMCCH16_PerfMonCtr4_Lo; -typedef union UMCCH16_PerfMonCtr5_Hi regUMCCH16_PerfMonCtr5_Hi; -typedef union UMCCH16_PerfMonCtr5_Lo regUMCCH16_PerfMonCtr5_Lo; -typedef union UMCCH16_PerfMonCtrClk_Hi regUMCCH16_PerfMonCtrClk_Hi; -typedef union UMCCH16_PerfMonCtrClk_Lo regUMCCH16_PerfMonCtrClk_Lo; -typedef union UMCCH17_PerfMonCtl1 regUMCCH17_PerfMonCtl1; -typedef union UMCCH17_PerfMonCtl2 regUMCCH17_PerfMonCtl2; -typedef union UMCCH17_PerfMonCtl3 regUMCCH17_PerfMonCtl3; -typedef union UMCCH17_PerfMonCtl4 regUMCCH17_PerfMonCtl4; -typedef union UMCCH17_PerfMonCtl5 regUMCCH17_PerfMonCtl5; -typedef union UMCCH17_PerfMonCtlClk regUMCCH17_PerfMonCtlClk; -typedef union UMCCH17_PerfMonCtr1_Hi regUMCCH17_PerfMonCtr1_Hi; -typedef union UMCCH17_PerfMonCtr1_Lo regUMCCH17_PerfMonCtr1_Lo; -typedef union UMCCH17_PerfMonCtr2_Hi regUMCCH17_PerfMonCtr2_Hi; -typedef union UMCCH17_PerfMonCtr2_Lo regUMCCH17_PerfMonCtr2_Lo; -typedef union UMCCH17_PerfMonCtr3_Hi regUMCCH17_PerfMonCtr3_Hi; -typedef union UMCCH17_PerfMonCtr3_Lo regUMCCH17_PerfMonCtr3_Lo; -typedef union UMCCH17_PerfMonCtr4_Hi regUMCCH17_PerfMonCtr4_Hi; -typedef union UMCCH17_PerfMonCtr4_Lo regUMCCH17_PerfMonCtr4_Lo; -typedef union UMCCH17_PerfMonCtr5_Hi regUMCCH17_PerfMonCtr5_Hi; -typedef union UMCCH17_PerfMonCtr5_Lo regUMCCH17_PerfMonCtr5_Lo; -typedef union UMCCH17_PerfMonCtrClk_Hi regUMCCH17_PerfMonCtrClk_Hi; -typedef union UMCCH17_PerfMonCtrClk_Lo regUMCCH17_PerfMonCtrClk_Lo; -typedef union UMCCH18_PerfMonCtl1 regUMCCH18_PerfMonCtl1; -typedef union UMCCH18_PerfMonCtl2 regUMCCH18_PerfMonCtl2; -typedef union UMCCH18_PerfMonCtl3 regUMCCH18_PerfMonCtl3; -typedef union UMCCH18_PerfMonCtl4 regUMCCH18_PerfMonCtl4; -typedef union UMCCH18_PerfMonCtl5 regUMCCH18_PerfMonCtl5; -typedef union UMCCH18_PerfMonCtlClk regUMCCH18_PerfMonCtlClk; -typedef union UMCCH18_PerfMonCtr1_Hi regUMCCH18_PerfMonCtr1_Hi; -typedef union UMCCH18_PerfMonCtr1_Lo regUMCCH18_PerfMonCtr1_Lo; -typedef union UMCCH18_PerfMonCtr2_Hi regUMCCH18_PerfMonCtr2_Hi; -typedef union UMCCH18_PerfMonCtr2_Lo regUMCCH18_PerfMonCtr2_Lo; -typedef union UMCCH18_PerfMonCtr3_Hi regUMCCH18_PerfMonCtr3_Hi; -typedef union UMCCH18_PerfMonCtr3_Lo regUMCCH18_PerfMonCtr3_Lo; -typedef union UMCCH18_PerfMonCtr4_Hi regUMCCH18_PerfMonCtr4_Hi; -typedef union UMCCH18_PerfMonCtr4_Lo regUMCCH18_PerfMonCtr4_Lo; -typedef union UMCCH18_PerfMonCtr5_Hi regUMCCH18_PerfMonCtr5_Hi; -typedef union UMCCH18_PerfMonCtr5_Lo regUMCCH18_PerfMonCtr5_Lo; -typedef union UMCCH18_PerfMonCtrClk_Hi regUMCCH18_PerfMonCtrClk_Hi; -typedef union UMCCH18_PerfMonCtrClk_Lo regUMCCH18_PerfMonCtrClk_Lo; -typedef union UMCCH19_PerfMonCtl1 regUMCCH19_PerfMonCtl1; -typedef union UMCCH19_PerfMonCtl2 regUMCCH19_PerfMonCtl2; -typedef union UMCCH19_PerfMonCtl3 regUMCCH19_PerfMonCtl3; -typedef union UMCCH19_PerfMonCtl4 regUMCCH19_PerfMonCtl4; -typedef union UMCCH19_PerfMonCtl5 regUMCCH19_PerfMonCtl5; -typedef union UMCCH19_PerfMonCtlClk regUMCCH19_PerfMonCtlClk; -typedef union UMCCH19_PerfMonCtr1_Hi regUMCCH19_PerfMonCtr1_Hi; -typedef union UMCCH19_PerfMonCtr1_Lo regUMCCH19_PerfMonCtr1_Lo; -typedef union UMCCH19_PerfMonCtr2_Hi regUMCCH19_PerfMonCtr2_Hi; -typedef union UMCCH19_PerfMonCtr2_Lo regUMCCH19_PerfMonCtr2_Lo; -typedef union UMCCH19_PerfMonCtr3_Hi regUMCCH19_PerfMonCtr3_Hi; -typedef union UMCCH19_PerfMonCtr3_Lo regUMCCH19_PerfMonCtr3_Lo; -typedef union UMCCH19_PerfMonCtr4_Hi regUMCCH19_PerfMonCtr4_Hi; -typedef union UMCCH19_PerfMonCtr4_Lo regUMCCH19_PerfMonCtr4_Lo; -typedef union UMCCH19_PerfMonCtr5_Hi regUMCCH19_PerfMonCtr5_Hi; -typedef union UMCCH19_PerfMonCtr5_Lo regUMCCH19_PerfMonCtr5_Lo; -typedef union UMCCH19_PerfMonCtrClk_Hi regUMCCH19_PerfMonCtrClk_Hi; -typedef union UMCCH19_PerfMonCtrClk_Lo regUMCCH19_PerfMonCtrClk_Lo; -typedef union UMCCH20_PerfMonCtl1 regUMCCH20_PerfMonCtl1; -typedef union UMCCH20_PerfMonCtl2 regUMCCH20_PerfMonCtl2; -typedef union UMCCH20_PerfMonCtl3 regUMCCH20_PerfMonCtl3; -typedef union UMCCH20_PerfMonCtl4 regUMCCH20_PerfMonCtl4; -typedef union UMCCH20_PerfMonCtl5 regUMCCH20_PerfMonCtl5; -typedef union UMCCH20_PerfMonCtlClk regUMCCH20_PerfMonCtlClk; -typedef union UMCCH20_PerfMonCtr1_Hi regUMCCH20_PerfMonCtr1_Hi; -typedef union UMCCH20_PerfMonCtr1_Lo regUMCCH20_PerfMonCtr1_Lo; -typedef union UMCCH20_PerfMonCtr2_Hi regUMCCH20_PerfMonCtr2_Hi; -typedef union UMCCH20_PerfMonCtr2_Lo regUMCCH20_PerfMonCtr2_Lo; -typedef union UMCCH20_PerfMonCtr3_Hi regUMCCH20_PerfMonCtr3_Hi; -typedef union UMCCH20_PerfMonCtr3_Lo regUMCCH20_PerfMonCtr3_Lo; -typedef union UMCCH20_PerfMonCtr4_Hi regUMCCH20_PerfMonCtr4_Hi; -typedef union UMCCH20_PerfMonCtr4_Lo regUMCCH20_PerfMonCtr4_Lo; -typedef union UMCCH20_PerfMonCtr5_Hi regUMCCH20_PerfMonCtr5_Hi; -typedef union UMCCH20_PerfMonCtr5_Lo regUMCCH20_PerfMonCtr5_Lo; -typedef union UMCCH20_PerfMonCtrClk_Hi regUMCCH20_PerfMonCtrClk_Hi; -typedef union UMCCH20_PerfMonCtrClk_Lo regUMCCH20_PerfMonCtrClk_Lo; -typedef union UMCCH21_PerfMonCtl1 regUMCCH21_PerfMonCtl1; -typedef union UMCCH21_PerfMonCtl2 regUMCCH21_PerfMonCtl2; -typedef union UMCCH21_PerfMonCtl3 regUMCCH21_PerfMonCtl3; -typedef union UMCCH21_PerfMonCtl4 regUMCCH21_PerfMonCtl4; -typedef union UMCCH21_PerfMonCtl5 regUMCCH21_PerfMonCtl5; -typedef union UMCCH21_PerfMonCtlClk regUMCCH21_PerfMonCtlClk; -typedef union UMCCH21_PerfMonCtr1_Hi regUMCCH21_PerfMonCtr1_Hi; -typedef union UMCCH21_PerfMonCtr1_Lo regUMCCH21_PerfMonCtr1_Lo; -typedef union UMCCH21_PerfMonCtr2_Hi regUMCCH21_PerfMonCtr2_Hi; -typedef union UMCCH21_PerfMonCtr2_Lo regUMCCH21_PerfMonCtr2_Lo; -typedef union UMCCH21_PerfMonCtr3_Hi regUMCCH21_PerfMonCtr3_Hi; -typedef union UMCCH21_PerfMonCtr3_Lo regUMCCH21_PerfMonCtr3_Lo; -typedef union UMCCH21_PerfMonCtr4_Hi regUMCCH21_PerfMonCtr4_Hi; -typedef union UMCCH21_PerfMonCtr4_Lo regUMCCH21_PerfMonCtr4_Lo; -typedef union UMCCH21_PerfMonCtr5_Hi regUMCCH21_PerfMonCtr5_Hi; -typedef union UMCCH21_PerfMonCtr5_Lo regUMCCH21_PerfMonCtr5_Lo; -typedef union UMCCH21_PerfMonCtrClk_Hi regUMCCH21_PerfMonCtrClk_Hi; -typedef union UMCCH21_PerfMonCtrClk_Lo regUMCCH21_PerfMonCtrClk_Lo; -typedef union UMCCH22_PerfMonCtl1 regUMCCH22_PerfMonCtl1; -typedef union UMCCH22_PerfMonCtl2 regUMCCH22_PerfMonCtl2; -typedef union UMCCH22_PerfMonCtl3 regUMCCH22_PerfMonCtl3; -typedef union UMCCH22_PerfMonCtl4 regUMCCH22_PerfMonCtl4; -typedef union UMCCH22_PerfMonCtl5 regUMCCH22_PerfMonCtl5; -typedef union UMCCH22_PerfMonCtlClk regUMCCH22_PerfMonCtlClk; -typedef union UMCCH22_PerfMonCtr1_Hi regUMCCH22_PerfMonCtr1_Hi; -typedef union UMCCH22_PerfMonCtr1_Lo regUMCCH22_PerfMonCtr1_Lo; -typedef union UMCCH22_PerfMonCtr2_Hi regUMCCH22_PerfMonCtr2_Hi; -typedef union UMCCH22_PerfMonCtr2_Lo regUMCCH22_PerfMonCtr2_Lo; -typedef union UMCCH22_PerfMonCtr3_Hi regUMCCH22_PerfMonCtr3_Hi; -typedef union UMCCH22_PerfMonCtr3_Lo regUMCCH22_PerfMonCtr3_Lo; -typedef union UMCCH22_PerfMonCtr4_Hi regUMCCH22_PerfMonCtr4_Hi; -typedef union UMCCH22_PerfMonCtr4_Lo regUMCCH22_PerfMonCtr4_Lo; -typedef union UMCCH22_PerfMonCtr5_Hi regUMCCH22_PerfMonCtr5_Hi; -typedef union UMCCH22_PerfMonCtr5_Lo regUMCCH22_PerfMonCtr5_Lo; -typedef union UMCCH22_PerfMonCtrClk_Hi regUMCCH22_PerfMonCtrClk_Hi; -typedef union UMCCH22_PerfMonCtrClk_Lo regUMCCH22_PerfMonCtrClk_Lo; -typedef union UMCCH23_PerfMonCtl1 regUMCCH23_PerfMonCtl1; -typedef union UMCCH23_PerfMonCtl2 regUMCCH23_PerfMonCtl2; -typedef union UMCCH23_PerfMonCtl3 regUMCCH23_PerfMonCtl3; -typedef union UMCCH23_PerfMonCtl4 regUMCCH23_PerfMonCtl4; -typedef union UMCCH23_PerfMonCtl5 regUMCCH23_PerfMonCtl5; -typedef union UMCCH23_PerfMonCtlClk regUMCCH23_PerfMonCtlClk; -typedef union UMCCH23_PerfMonCtr1_Hi regUMCCH23_PerfMonCtr1_Hi; -typedef union UMCCH23_PerfMonCtr1_Lo regUMCCH23_PerfMonCtr1_Lo; -typedef union UMCCH23_PerfMonCtr2_Hi regUMCCH23_PerfMonCtr2_Hi; -typedef union UMCCH23_PerfMonCtr2_Lo regUMCCH23_PerfMonCtr2_Lo; -typedef union UMCCH23_PerfMonCtr3_Hi regUMCCH23_PerfMonCtr3_Hi; -typedef union UMCCH23_PerfMonCtr3_Lo regUMCCH23_PerfMonCtr3_Lo; -typedef union UMCCH23_PerfMonCtr4_Hi regUMCCH23_PerfMonCtr4_Hi; -typedef union UMCCH23_PerfMonCtr4_Lo regUMCCH23_PerfMonCtr4_Lo; -typedef union UMCCH23_PerfMonCtr5_Hi regUMCCH23_PerfMonCtr5_Hi; -typedef union UMCCH23_PerfMonCtr5_Lo regUMCCH23_PerfMonCtr5_Lo; -typedef union UMCCH23_PerfMonCtrClk_Hi regUMCCH23_PerfMonCtrClk_Hi; -typedef union UMCCH23_PerfMonCtrClk_Lo regUMCCH23_PerfMonCtrClk_Lo; -typedef union UMCCH24_PerfMonCtl1 regUMCCH24_PerfMonCtl1; -typedef union UMCCH24_PerfMonCtl2 regUMCCH24_PerfMonCtl2; -typedef union UMCCH24_PerfMonCtl3 regUMCCH24_PerfMonCtl3; -typedef union UMCCH24_PerfMonCtl4 regUMCCH24_PerfMonCtl4; -typedef union UMCCH24_PerfMonCtl5 regUMCCH24_PerfMonCtl5; -typedef union UMCCH24_PerfMonCtlClk regUMCCH24_PerfMonCtlClk; -typedef union UMCCH24_PerfMonCtr1_Hi regUMCCH24_PerfMonCtr1_Hi; -typedef union UMCCH24_PerfMonCtr1_Lo regUMCCH24_PerfMonCtr1_Lo; -typedef union UMCCH24_PerfMonCtr2_Hi regUMCCH24_PerfMonCtr2_Hi; -typedef union UMCCH24_PerfMonCtr2_Lo regUMCCH24_PerfMonCtr2_Lo; -typedef union UMCCH24_PerfMonCtr3_Hi regUMCCH24_PerfMonCtr3_Hi; -typedef union UMCCH24_PerfMonCtr3_Lo regUMCCH24_PerfMonCtr3_Lo; -typedef union UMCCH24_PerfMonCtr4_Hi regUMCCH24_PerfMonCtr4_Hi; -typedef union UMCCH24_PerfMonCtr4_Lo regUMCCH24_PerfMonCtr4_Lo; -typedef union UMCCH24_PerfMonCtr5_Hi regUMCCH24_PerfMonCtr5_Hi; -typedef union UMCCH24_PerfMonCtr5_Lo regUMCCH24_PerfMonCtr5_Lo; -typedef union UMCCH24_PerfMonCtrClk_Hi regUMCCH24_PerfMonCtrClk_Hi; -typedef union UMCCH24_PerfMonCtrClk_Lo regUMCCH24_PerfMonCtrClk_Lo; -typedef union UMCCH25_PerfMonCtl1 regUMCCH25_PerfMonCtl1; -typedef union UMCCH25_PerfMonCtl2 regUMCCH25_PerfMonCtl2; -typedef union UMCCH25_PerfMonCtl3 regUMCCH25_PerfMonCtl3; -typedef union UMCCH25_PerfMonCtl4 regUMCCH25_PerfMonCtl4; -typedef union UMCCH25_PerfMonCtl5 regUMCCH25_PerfMonCtl5; -typedef union UMCCH25_PerfMonCtlClk regUMCCH25_PerfMonCtlClk; -typedef union UMCCH25_PerfMonCtr1_Hi regUMCCH25_PerfMonCtr1_Hi; -typedef union UMCCH25_PerfMonCtr1_Lo regUMCCH25_PerfMonCtr1_Lo; -typedef union UMCCH25_PerfMonCtr2_Hi regUMCCH25_PerfMonCtr2_Hi; -typedef union UMCCH25_PerfMonCtr2_Lo regUMCCH25_PerfMonCtr2_Lo; -typedef union UMCCH25_PerfMonCtr3_Hi regUMCCH25_PerfMonCtr3_Hi; -typedef union UMCCH25_PerfMonCtr3_Lo regUMCCH25_PerfMonCtr3_Lo; -typedef union UMCCH25_PerfMonCtr4_Hi regUMCCH25_PerfMonCtr4_Hi; -typedef union UMCCH25_PerfMonCtr4_Lo regUMCCH25_PerfMonCtr4_Lo; -typedef union UMCCH25_PerfMonCtr5_Hi regUMCCH25_PerfMonCtr5_Hi; -typedef union UMCCH25_PerfMonCtr5_Lo regUMCCH25_PerfMonCtr5_Lo; -typedef union UMCCH25_PerfMonCtrClk_Hi regUMCCH25_PerfMonCtrClk_Hi; -typedef union UMCCH25_PerfMonCtrClk_Lo regUMCCH25_PerfMonCtrClk_Lo; -typedef union UMCCH26_PerfMonCtl1 regUMCCH26_PerfMonCtl1; -typedef union UMCCH26_PerfMonCtl2 regUMCCH26_PerfMonCtl2; -typedef union UMCCH26_PerfMonCtl3 regUMCCH26_PerfMonCtl3; -typedef union UMCCH26_PerfMonCtl4 regUMCCH26_PerfMonCtl4; -typedef union UMCCH26_PerfMonCtl5 regUMCCH26_PerfMonCtl5; -typedef union UMCCH26_PerfMonCtlClk regUMCCH26_PerfMonCtlClk; -typedef union UMCCH26_PerfMonCtr1_Hi regUMCCH26_PerfMonCtr1_Hi; -typedef union UMCCH26_PerfMonCtr1_Lo regUMCCH26_PerfMonCtr1_Lo; -typedef union UMCCH26_PerfMonCtr2_Hi regUMCCH26_PerfMonCtr2_Hi; -typedef union UMCCH26_PerfMonCtr2_Lo regUMCCH26_PerfMonCtr2_Lo; -typedef union UMCCH26_PerfMonCtr3_Hi regUMCCH26_PerfMonCtr3_Hi; -typedef union UMCCH26_PerfMonCtr3_Lo regUMCCH26_PerfMonCtr3_Lo; -typedef union UMCCH26_PerfMonCtr4_Hi regUMCCH26_PerfMonCtr4_Hi; -typedef union UMCCH26_PerfMonCtr4_Lo regUMCCH26_PerfMonCtr4_Lo; -typedef union UMCCH26_PerfMonCtr5_Hi regUMCCH26_PerfMonCtr5_Hi; -typedef union UMCCH26_PerfMonCtr5_Lo regUMCCH26_PerfMonCtr5_Lo; -typedef union UMCCH26_PerfMonCtrClk_Hi regUMCCH26_PerfMonCtrClk_Hi; -typedef union UMCCH26_PerfMonCtrClk_Lo regUMCCH26_PerfMonCtrClk_Lo; -typedef union UMCCH27_PerfMonCtl1 regUMCCH27_PerfMonCtl1; -typedef union UMCCH27_PerfMonCtl2 regUMCCH27_PerfMonCtl2; -typedef union UMCCH27_PerfMonCtl3 regUMCCH27_PerfMonCtl3; -typedef union UMCCH27_PerfMonCtl4 regUMCCH27_PerfMonCtl4; -typedef union UMCCH27_PerfMonCtl5 regUMCCH27_PerfMonCtl5; -typedef union UMCCH27_PerfMonCtlClk regUMCCH27_PerfMonCtlClk; -typedef union UMCCH27_PerfMonCtr1_Hi regUMCCH27_PerfMonCtr1_Hi; -typedef union UMCCH27_PerfMonCtr1_Lo regUMCCH27_PerfMonCtr1_Lo; -typedef union UMCCH27_PerfMonCtr2_Hi regUMCCH27_PerfMonCtr2_Hi; -typedef union UMCCH27_PerfMonCtr2_Lo regUMCCH27_PerfMonCtr2_Lo; -typedef union UMCCH27_PerfMonCtr3_Hi regUMCCH27_PerfMonCtr3_Hi; -typedef union UMCCH27_PerfMonCtr3_Lo regUMCCH27_PerfMonCtr3_Lo; -typedef union UMCCH27_PerfMonCtr4_Hi regUMCCH27_PerfMonCtr4_Hi; -typedef union UMCCH27_PerfMonCtr4_Lo regUMCCH27_PerfMonCtr4_Lo; -typedef union UMCCH27_PerfMonCtr5_Hi regUMCCH27_PerfMonCtr5_Hi; -typedef union UMCCH27_PerfMonCtr5_Lo regUMCCH27_PerfMonCtr5_Lo; -typedef union UMCCH27_PerfMonCtrClk_Hi regUMCCH27_PerfMonCtrClk_Hi; -typedef union UMCCH27_PerfMonCtrClk_Lo regUMCCH27_PerfMonCtrClk_Lo; -typedef union UMCCH28_PerfMonCtl1 regUMCCH28_PerfMonCtl1; -typedef union UMCCH28_PerfMonCtl2 regUMCCH28_PerfMonCtl2; -typedef union UMCCH28_PerfMonCtl3 regUMCCH28_PerfMonCtl3; -typedef union UMCCH28_PerfMonCtl4 regUMCCH28_PerfMonCtl4; -typedef union UMCCH28_PerfMonCtl5 regUMCCH28_PerfMonCtl5; -typedef union UMCCH28_PerfMonCtlClk regUMCCH28_PerfMonCtlClk; -typedef union UMCCH28_PerfMonCtr1_Hi regUMCCH28_PerfMonCtr1_Hi; -typedef union UMCCH28_PerfMonCtr1_Lo regUMCCH28_PerfMonCtr1_Lo; -typedef union UMCCH28_PerfMonCtr2_Hi regUMCCH28_PerfMonCtr2_Hi; -typedef union UMCCH28_PerfMonCtr2_Lo regUMCCH28_PerfMonCtr2_Lo; -typedef union UMCCH28_PerfMonCtr3_Hi regUMCCH28_PerfMonCtr3_Hi; -typedef union UMCCH28_PerfMonCtr3_Lo regUMCCH28_PerfMonCtr3_Lo; -typedef union UMCCH28_PerfMonCtr4_Hi regUMCCH28_PerfMonCtr4_Hi; -typedef union UMCCH28_PerfMonCtr4_Lo regUMCCH28_PerfMonCtr4_Lo; -typedef union UMCCH28_PerfMonCtr5_Hi regUMCCH28_PerfMonCtr5_Hi; -typedef union UMCCH28_PerfMonCtr5_Lo regUMCCH28_PerfMonCtr5_Lo; -typedef union UMCCH28_PerfMonCtrClk_Hi regUMCCH28_PerfMonCtrClk_Hi; -typedef union UMCCH28_PerfMonCtrClk_Lo regUMCCH28_PerfMonCtrClk_Lo; -typedef union UMCCH29_PerfMonCtl1 regUMCCH29_PerfMonCtl1; -typedef union UMCCH29_PerfMonCtl2 regUMCCH29_PerfMonCtl2; -typedef union UMCCH29_PerfMonCtl3 regUMCCH29_PerfMonCtl3; -typedef union UMCCH29_PerfMonCtl4 regUMCCH29_PerfMonCtl4; -typedef union UMCCH29_PerfMonCtl5 regUMCCH29_PerfMonCtl5; -typedef union UMCCH29_PerfMonCtlClk regUMCCH29_PerfMonCtlClk; -typedef union UMCCH29_PerfMonCtr1_Hi regUMCCH29_PerfMonCtr1_Hi; -typedef union UMCCH29_PerfMonCtr1_Lo regUMCCH29_PerfMonCtr1_Lo; -typedef union UMCCH29_PerfMonCtr2_Hi regUMCCH29_PerfMonCtr2_Hi; -typedef union UMCCH29_PerfMonCtr2_Lo regUMCCH29_PerfMonCtr2_Lo; -typedef union UMCCH29_PerfMonCtr3_Hi regUMCCH29_PerfMonCtr3_Hi; -typedef union UMCCH29_PerfMonCtr3_Lo regUMCCH29_PerfMonCtr3_Lo; -typedef union UMCCH29_PerfMonCtr4_Hi regUMCCH29_PerfMonCtr4_Hi; -typedef union UMCCH29_PerfMonCtr4_Lo regUMCCH29_PerfMonCtr4_Lo; -typedef union UMCCH29_PerfMonCtr5_Hi regUMCCH29_PerfMonCtr5_Hi; -typedef union UMCCH29_PerfMonCtr5_Lo regUMCCH29_PerfMonCtr5_Lo; -typedef union UMCCH29_PerfMonCtrClk_Hi regUMCCH29_PerfMonCtrClk_Hi; -typedef union UMCCH29_PerfMonCtrClk_Lo regUMCCH29_PerfMonCtrClk_Lo; -typedef union UMCCH30_PerfMonCtl1 regUMCCH30_PerfMonCtl1; -typedef union UMCCH30_PerfMonCtl2 regUMCCH30_PerfMonCtl2; -typedef union UMCCH30_PerfMonCtl3 regUMCCH30_PerfMonCtl3; -typedef union UMCCH30_PerfMonCtl4 regUMCCH30_PerfMonCtl4; -typedef union UMCCH30_PerfMonCtl5 regUMCCH30_PerfMonCtl5; -typedef union UMCCH30_PerfMonCtlClk regUMCCH30_PerfMonCtlClk; -typedef union UMCCH30_PerfMonCtr1_Hi regUMCCH30_PerfMonCtr1_Hi; -typedef union UMCCH30_PerfMonCtr1_Lo regUMCCH30_PerfMonCtr1_Lo; -typedef union UMCCH30_PerfMonCtr2_Hi regUMCCH30_PerfMonCtr2_Hi; -typedef union UMCCH30_PerfMonCtr2_Lo regUMCCH30_PerfMonCtr2_Lo; -typedef union UMCCH30_PerfMonCtr3_Hi regUMCCH30_PerfMonCtr3_Hi; -typedef union UMCCH30_PerfMonCtr3_Lo regUMCCH30_PerfMonCtr3_Lo; -typedef union UMCCH30_PerfMonCtr4_Hi regUMCCH30_PerfMonCtr4_Hi; -typedef union UMCCH30_PerfMonCtr4_Lo regUMCCH30_PerfMonCtr4_Lo; -typedef union UMCCH30_PerfMonCtr5_Hi regUMCCH30_PerfMonCtr5_Hi; -typedef union UMCCH30_PerfMonCtr5_Lo regUMCCH30_PerfMonCtr5_Lo; -typedef union UMCCH30_PerfMonCtrClk_Hi regUMCCH30_PerfMonCtrClk_Hi; -typedef union UMCCH30_PerfMonCtrClk_Lo regUMCCH30_PerfMonCtrClk_Lo; -typedef union UMCCH31_PerfMonCtl1 regUMCCH31_PerfMonCtl1; -typedef union UMCCH31_PerfMonCtl2 regUMCCH31_PerfMonCtl2; -typedef union UMCCH31_PerfMonCtl3 regUMCCH31_PerfMonCtl3; -typedef union UMCCH31_PerfMonCtl4 regUMCCH31_PerfMonCtl4; -typedef union UMCCH31_PerfMonCtl5 regUMCCH31_PerfMonCtl5; -typedef union UMCCH31_PerfMonCtlClk regUMCCH31_PerfMonCtlClk; -typedef union UMCCH31_PerfMonCtr1_Hi regUMCCH31_PerfMonCtr1_Hi; -typedef union UMCCH31_PerfMonCtr1_Lo regUMCCH31_PerfMonCtr1_Lo; -typedef union UMCCH31_PerfMonCtr2_Hi regUMCCH31_PerfMonCtr2_Hi; -typedef union UMCCH31_PerfMonCtr2_Lo regUMCCH31_PerfMonCtr2_Lo; -typedef union UMCCH31_PerfMonCtr3_Hi regUMCCH31_PerfMonCtr3_Hi; -typedef union UMCCH31_PerfMonCtr3_Lo regUMCCH31_PerfMonCtr3_Lo; -typedef union UMCCH31_PerfMonCtr4_Hi regUMCCH31_PerfMonCtr4_Hi; -typedef union UMCCH31_PerfMonCtr4_Lo regUMCCH31_PerfMonCtr4_Lo; -typedef union UMCCH31_PerfMonCtr5_Hi regUMCCH31_PerfMonCtr5_Hi; -typedef union UMCCH31_PerfMonCtr5_Lo regUMCCH31_PerfMonCtr5_Lo; -typedef union UMCCH31_PerfMonCtrClk_Hi regUMCCH31_PerfMonCtrClk_Hi; -typedef union UMCCH31_PerfMonCtrClk_Lo regUMCCH31_PerfMonCtrClk_Lo; -typedef union UTCL1_PERFCOUNTER0_HI regUTCL1_PERFCOUNTER0_HI; -typedef union UTCL1_PERFCOUNTER0_LO regUTCL1_PERFCOUNTER0_LO; -typedef union UTCL1_PERFCOUNTER0_SELECT regUTCL1_PERFCOUNTER0_SELECT; -typedef union UTCL1_PERFCOUNTER1_HI regUTCL1_PERFCOUNTER1_HI; -typedef union UTCL1_PERFCOUNTER1_LO regUTCL1_PERFCOUNTER1_LO; -typedef union UTCL1_PERFCOUNTER1_SELECT regUTCL1_PERFCOUNTER1_SELECT; -#if CHIP_HDR_NAVI31|| CHIP_HDR_NAVI32|| CHIP_HDR_NAVI33|| CHIP_HDR_PHOENIX1 -typedef union UTCL1_PERFCOUNTER2_HI regUTCL1_PERFCOUNTER2_HI; -typedef union UTCL1_PERFCOUNTER2_LO regUTCL1_PERFCOUNTER2_LO; -typedef union UTCL1_PERFCOUNTER2_SELECT regUTCL1_PERFCOUNTER2_SELECT; -typedef union UTCL1_PERFCOUNTER3_HI regUTCL1_PERFCOUNTER3_HI; -typedef union UTCL1_PERFCOUNTER3_LO regUTCL1_PERFCOUNTER3_LO; -typedef union UTCL1_PERFCOUNTER3_SELECT regUTCL1_PERFCOUNTER3_SELECT; -#endif -typedef union VGT_CACHE_INVALIDATION regVGT_CACHE_INVALIDATION; -typedef union VGT_CNTL_STATUS regVGT_CNTL_STATUS; -typedef union VGT_DISPATCH_DRAW_INDEX regVGT_DISPATCH_DRAW_INDEX; -typedef union VGT_DMA_BASE regVGT_DMA_BASE; -typedef union VGT_DMA_BASE_HI regVGT_DMA_BASE_HI; -typedef union VGT_DMA_CONTROL regVGT_DMA_CONTROL; -typedef union VGT_DMA_DATA_FIFO_DEPTH regVGT_DMA_DATA_FIFO_DEPTH; -typedef union VGT_DMA_EVENT_INITIATOR regVGT_DMA_EVENT_INITIATOR; -typedef union VGT_DMA_INDEX_TYPE regVGT_DMA_INDEX_TYPE; -typedef union VGT_DMA_LS_HS_CONFIG regVGT_DMA_LS_HS_CONFIG; -typedef union VGT_DMA_MAX_SIZE regVGT_DMA_MAX_SIZE; -typedef union VGT_DMA_NUM_INSTANCES regVGT_DMA_NUM_INSTANCES; -typedef union VGT_DMA_PRIMITIVE_TYPE regVGT_DMA_PRIMITIVE_TYPE; -typedef union VGT_DMA_REQ_FIFO_DEPTH regVGT_DMA_REQ_FIFO_DEPTH; -typedef union VGT_DMA_SIZE regVGT_DMA_SIZE; -typedef union VGT_DRAW_INITIATOR regVGT_DRAW_INITIATOR; -typedef union VGT_DRAW_INIT_FIFO_DEPTH regVGT_DRAW_INIT_FIFO_DEPTH; -typedef union VGT_DRAW_PAYLOAD_CNTL regVGT_DRAW_PAYLOAD_CNTL; -typedef union VGT_ENHANCE regVGT_ENHANCE; -typedef union VGT_ESGS_RING_ITEMSIZE regVGT_ESGS_RING_ITEMSIZE; -typedef union VGT_ESGS_RING_SIZE regVGT_ESGS_RING_SIZE; -typedef union VGT_ESGS_RING_SIZE_UMD regVGT_ESGS_RING_SIZE_UMD; -typedef union VGT_ES_PER_GS regVGT_ES_PER_GS; -typedef union VGT_EVENT_ADDRESS_REG regVGT_EVENT_ADDRESS_REG; -typedef union VGT_EVENT_INITIATOR regVGT_EVENT_INITIATOR; -typedef union VGT_FIFO_DEPTHS regVGT_FIFO_DEPTHS; -typedef union VGT_GROUP_DECR regVGT_GROUP_DECR; -typedef union VGT_GROUP_FIRST_DECR regVGT_GROUP_FIRST_DECR; -typedef union VGT_GROUP_PRIM_TYPE regVGT_GROUP_PRIM_TYPE; -typedef union VGT_GROUP_VECT_0_CNTL regVGT_GROUP_VECT_0_CNTL; -typedef union VGT_GROUP_VECT_0_FMT_CNTL regVGT_GROUP_VECT_0_FMT_CNTL; -typedef union VGT_GROUP_VECT_1_CNTL regVGT_GROUP_VECT_1_CNTL; -typedef union VGT_GROUP_VECT_1_FMT_CNTL regVGT_GROUP_VECT_1_FMT_CNTL; -typedef union VGT_GSVS_RING_ITEMSIZE regVGT_GSVS_RING_ITEMSIZE; -typedef union VGT_GSVS_RING_OFFSET_1 regVGT_GSVS_RING_OFFSET_1; -typedef union VGT_GSVS_RING_OFFSET_2 regVGT_GSVS_RING_OFFSET_2; -typedef union VGT_GSVS_RING_OFFSET_3 regVGT_GSVS_RING_OFFSET_3; -typedef union VGT_GSVS_RING_SIZE regVGT_GSVS_RING_SIZE; -typedef union VGT_GSVS_RING_SIZE_UMD regVGT_GSVS_RING_SIZE_UMD; -typedef union VGT_GS_INSTANCE_CNT regVGT_GS_INSTANCE_CNT; -typedef union VGT_GS_MAX_PRIMS_PER_SUBGROUP regVGT_GS_MAX_PRIMS_PER_SUBGROUP; -typedef union VGT_GS_MAX_VERT_OUT regVGT_GS_MAX_VERT_OUT; -typedef union VGT_GS_MAX_WAVE_ID regVGT_GS_MAX_WAVE_ID; -typedef union VGT_GS_MODE regVGT_GS_MODE; -typedef union VGT_GS_ONCHIP_CNTL regVGT_GS_ONCHIP_CNTL; -typedef union VGT_GS_OUT_PRIM_TYPE regVGT_GS_OUT_PRIM_TYPE; -typedef union VGT_GS_PER_ES regVGT_GS_PER_ES; -typedef union VGT_GS_PER_VS regVGT_GS_PER_VS; -typedef union VGT_GS_VERTEX_REUSE regVGT_GS_VERTEX_REUSE; -typedef union VGT_GS_VERT_ITEMSIZE regVGT_GS_VERT_ITEMSIZE; -typedef union VGT_GS_VERT_ITEMSIZE_1 regVGT_GS_VERT_ITEMSIZE_1; -typedef union VGT_GS_VERT_ITEMSIZE_2 regVGT_GS_VERT_ITEMSIZE_2; -typedef union VGT_GS_VERT_ITEMSIZE_3 regVGT_GS_VERT_ITEMSIZE_3; -typedef union VGT_HOS_CNTL regVGT_HOS_CNTL; -typedef union VGT_HOS_MAX_TESS_LEVEL regVGT_HOS_MAX_TESS_LEVEL; -typedef union VGT_HOS_MIN_TESS_LEVEL regVGT_HOS_MIN_TESS_LEVEL; -typedef union VGT_HOS_REUSE_DEPTH regVGT_HOS_REUSE_DEPTH; -typedef union VGT_HS_OFFCHIP_PARAM regVGT_HS_OFFCHIP_PARAM; -typedef union VGT_HS_OFFCHIP_PARAM_UMD regVGT_HS_OFFCHIP_PARAM_UMD; -typedef union VGT_IMMED_DATA regVGT_IMMED_DATA; -typedef union VGT_INDEX_TYPE regVGT_INDEX_TYPE; -typedef union VGT_INDX_OFFSET regVGT_INDX_OFFSET; -typedef union VGT_INSTANCE_BASE_ID regVGT_INSTANCE_BASE_ID; -typedef union VGT_INSTANCE_STEP_RATE_0 regVGT_INSTANCE_STEP_RATE_0; -typedef union VGT_INSTANCE_STEP_RATE_1 regVGT_INSTANCE_STEP_RATE_1; -typedef union VGT_LAST_COPY_STATE regVGT_LAST_COPY_STATE; -typedef union VGT_LS_HS_CONFIG regVGT_LS_HS_CONFIG; -typedef union VGT_MAX_VTX_INDX regVGT_MAX_VTX_INDX; -typedef union VGT_MC_LAT_CNTL regVGT_MC_LAT_CNTL; -typedef union VGT_MIN_VTX_INDX regVGT_MIN_VTX_INDX; -typedef union VGT_MULTI_PRIM_IB_RESET_EN regVGT_MULTI_PRIM_IB_RESET_EN; -typedef union VGT_MULTI_PRIM_IB_RESET_INDX regVGT_MULTI_PRIM_IB_RESET_INDX; -typedef union VGT_NUM_INDICES regVGT_NUM_INDICES; -typedef union VGT_NUM_INSTANCES regVGT_NUM_INSTANCES; -typedef union VGT_OUTPUT_PATH_CNTL regVGT_OUTPUT_PATH_CNTL; -typedef union VGT_OUT_DEALLOC_CNTL regVGT_OUT_DEALLOC_CNTL; -typedef union VGT_PERFCOUNTER0_HI regVGT_PERFCOUNTER0_HI; -typedef union VGT_PERFCOUNTER0_LO regVGT_PERFCOUNTER0_LO; -typedef union VGT_PERFCOUNTER0_SELECT regVGT_PERFCOUNTER0_SELECT; -typedef union VGT_PERFCOUNTER0_SELECT1 regVGT_PERFCOUNTER0_SELECT1; -typedef union VGT_PERFCOUNTER1_HI regVGT_PERFCOUNTER1_HI; -typedef union VGT_PERFCOUNTER1_LO regVGT_PERFCOUNTER1_LO; -typedef union VGT_PERFCOUNTER1_SELECT regVGT_PERFCOUNTER1_SELECT; -typedef union VGT_PERFCOUNTER1_SELECT1 regVGT_PERFCOUNTER1_SELECT1; -typedef union VGT_PERFCOUNTER2_HI regVGT_PERFCOUNTER2_HI; -typedef union VGT_PERFCOUNTER2_LO regVGT_PERFCOUNTER2_LO; -typedef union VGT_PERFCOUNTER2_SELECT regVGT_PERFCOUNTER2_SELECT; -typedef union VGT_PERFCOUNTER3_HI regVGT_PERFCOUNTER3_HI; -typedef union VGT_PERFCOUNTER3_LO regVGT_PERFCOUNTER3_LO; -typedef union VGT_PERFCOUNTER3_SELECT regVGT_PERFCOUNTER3_SELECT; -typedef union VGT_PERFCOUNTER_SEID_MASK regVGT_PERFCOUNTER_SEID_MASK; -typedef union VGT_PRIMITIVEID_EN regVGT_PRIMITIVEID_EN; -typedef union VGT_PRIMITIVEID_RESET regVGT_PRIMITIVEID_RESET; -typedef union VGT_PRIMITIVE_TYPE regVGT_PRIMITIVE_TYPE; -typedef union VGT_REUSE_OFF regVGT_REUSE_OFF; -typedef union VGT_SHADER_STAGES_EN regVGT_SHADER_STAGES_EN; -typedef union VGT_STRMOUT_BUFFER_CONFIG regVGT_STRMOUT_BUFFER_CONFIG; -typedef union VGT_STRMOUT_BUFFER_FILLED_SIZE_0 regVGT_STRMOUT_BUFFER_FILLED_SIZE_0; -typedef union VGT_STRMOUT_BUFFER_FILLED_SIZE_1 regVGT_STRMOUT_BUFFER_FILLED_SIZE_1; -typedef union VGT_STRMOUT_BUFFER_FILLED_SIZE_2 regVGT_STRMOUT_BUFFER_FILLED_SIZE_2; -typedef union VGT_STRMOUT_BUFFER_FILLED_SIZE_3 regVGT_STRMOUT_BUFFER_FILLED_SIZE_3; -typedef union VGT_STRMOUT_BUFFER_OFFSET_0 regVGT_STRMOUT_BUFFER_OFFSET_0; -typedef union VGT_STRMOUT_BUFFER_OFFSET_1 regVGT_STRMOUT_BUFFER_OFFSET_1; -typedef union VGT_STRMOUT_BUFFER_OFFSET_2 regVGT_STRMOUT_BUFFER_OFFSET_2; -typedef union VGT_STRMOUT_BUFFER_OFFSET_3 regVGT_STRMOUT_BUFFER_OFFSET_3; -typedef union VGT_STRMOUT_BUFFER_SIZE_0 regVGT_STRMOUT_BUFFER_SIZE_0; -typedef union VGT_STRMOUT_BUFFER_SIZE_1 regVGT_STRMOUT_BUFFER_SIZE_1; -typedef union VGT_STRMOUT_BUFFER_SIZE_2 regVGT_STRMOUT_BUFFER_SIZE_2; -typedef union VGT_STRMOUT_BUFFER_SIZE_3 regVGT_STRMOUT_BUFFER_SIZE_3; -typedef union VGT_STRMOUT_CONFIG regVGT_STRMOUT_CONFIG; -typedef union VGT_STRMOUT_DELAY regVGT_STRMOUT_DELAY; -typedef union VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE regVGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE; -typedef union VGT_STRMOUT_DRAW_OPAQUE_OFFSET regVGT_STRMOUT_DRAW_OPAQUE_OFFSET; -typedef union VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE regVGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE; -typedef union VGT_STRMOUT_VTX_STRIDE_0 regVGT_STRMOUT_VTX_STRIDE_0; -typedef union VGT_STRMOUT_VTX_STRIDE_1 regVGT_STRMOUT_VTX_STRIDE_1; -typedef union VGT_STRMOUT_VTX_STRIDE_2 regVGT_STRMOUT_VTX_STRIDE_2; -typedef union VGT_STRMOUT_VTX_STRIDE_3 regVGT_STRMOUT_VTX_STRIDE_3; -typedef union VGT_SYS_CONFIG regVGT_SYS_CONFIG; -typedef union VGT_TESS_DISTRIBUTION regVGT_TESS_DISTRIBUTION; -typedef union VGT_TF_MEMORY_BASE regVGT_TF_MEMORY_BASE; -typedef union VGT_TF_MEMORY_BASE_HI regVGT_TF_MEMORY_BASE_HI; -typedef union VGT_TF_MEMORY_BASE_HI_UMD regVGT_TF_MEMORY_BASE_HI_UMD; -typedef union VGT_TF_MEMORY_BASE_UMD regVGT_TF_MEMORY_BASE_UMD; -typedef union VGT_TF_PARAM regVGT_TF_PARAM; -typedef union VGT_TF_RING_SIZE regVGT_TF_RING_SIZE; -typedef union VGT_TF_RING_SIZE_UMD regVGT_TF_RING_SIZE_UMD; -typedef union VGT_VERTEX_REUSE_BLOCK_CNTL regVGT_VERTEX_REUSE_BLOCK_CNTL; -typedef union VGT_VS_MAX_WAVE_ID regVGT_VS_MAX_WAVE_ID; -typedef union VGT_VTX_CNT_EN regVGT_VTX_CNT_EN; -typedef union VGT_VTX_VECT_EJECT_REG regVGT_VTX_VECT_EJECT_REG; -typedef union WD_BUF_RESOURCE_1 regWD_BUF_RESOURCE_1; -typedef union WD_BUF_RESOURCE_2 regWD_BUF_RESOURCE_2; -typedef union WD_CNTL_SB_BUF_BASE regWD_CNTL_SB_BUF_BASE; -typedef union WD_CNTL_SB_BUF_BASE_HI regWD_CNTL_SB_BUF_BASE_HI; -typedef union WD_CNTL_STATUS regWD_CNTL_STATUS; -typedef union WD_ENHANCE regWD_ENHANCE; -typedef union WD_INDEX_BUF_BASE regWD_INDEX_BUF_BASE; -typedef union WD_INDEX_BUF_BASE_HI regWD_INDEX_BUF_BASE_HI; -typedef union WD_PERFCOUNTER0_HI regWD_PERFCOUNTER0_HI; -typedef union WD_PERFCOUNTER0_LO regWD_PERFCOUNTER0_LO; -typedef union WD_PERFCOUNTER0_SELECT regWD_PERFCOUNTER0_SELECT; -typedef union WD_PERFCOUNTER1_HI regWD_PERFCOUNTER1_HI; -typedef union WD_PERFCOUNTER1_LO regWD_PERFCOUNTER1_LO; -typedef union WD_PERFCOUNTER1_SELECT regWD_PERFCOUNTER1_SELECT; -typedef union WD_PERFCOUNTER2_HI regWD_PERFCOUNTER2_HI; -typedef union WD_PERFCOUNTER2_LO regWD_PERFCOUNTER2_LO; -typedef union WD_PERFCOUNTER2_SELECT regWD_PERFCOUNTER2_SELECT; -typedef union WD_PERFCOUNTER3_HI regWD_PERFCOUNTER3_HI; -typedef union WD_PERFCOUNTER3_LO regWD_PERFCOUNTER3_LO; -typedef union WD_PERFCOUNTER3_SELECT regWD_PERFCOUNTER3_SELECT; -typedef union WD_POS_BUF_BASE regWD_POS_BUF_BASE; -typedef union WD_POS_BUF_BASE_HI regWD_POS_BUF_BASE_HI; -typedef union WD_QOS regWD_QOS; -typedef union WD_UTCL1_CNTL regWD_UTCL1_CNTL; -typedef union WD_UTCL1_STATUS regWD_UTCL1_STATUS; -typedef union XDMA_SLV_FLIP_PENDING regXDMA_SLV_FLIP_PENDING; -} // inline namespace Chip -} // namespace Gfx9 -} // namespace Pal diff --git a/lgc/include/lgc/builder/BuilderImpl.h b/lgc/include/lgc/builder/BuilderImpl.h index 59d2f7911b..d06aeb6a37 100644 --- a/lgc/include/lgc/builder/BuilderImpl.h +++ b/lgc/include/lgc/builder/BuilderImpl.h @@ -73,15 +73,9 @@ class BuilderImpl : public BuilderDefs { // Get the PipelineState object. PipelineState *getPipelineState() const { return m_pipelineState; } - // Get whether the context we are building in supports DPP ROW_XMASK operations. - bool supportDppRowXmask() const; - // Get whether the context we are building in support the bpermute operation. bool supportWaveWideBPermute() const; - // Get whether the context we are building in supports permute lane DPP operations. - bool supportPermLaneDpp() const; - // Get whether the context we are building in supports permute lane 64 DPP operations. bool supportPermLane64Dpp() const; @@ -294,7 +288,15 @@ class BuilderImpl : public BuilderDefs { public: // Create a buffer descriptor. llvm::Value *CreateBufferDesc(uint64_t descSet, unsigned binding, llvm::Value *descIndex, unsigned flags, - const llvm::Twine &instName = ""); + const llvm::Twine &instName = "") { + return createBufferDesc(descSet, binding, descIndex, flags, 0); + } + + // Create a strided buffer descriptor. + llvm::Value *CreateStridedBufferDesc(uint64_t descSet, unsigned binding, llvm::Value *descIndex, unsigned flags, + unsigned stride, const llvm::Twine &instName = "") { + return createBufferDesc(descSet, binding, descIndex, flags, stride); + } // Create a get of the stride (in bytes) of a descriptor. llvm::Value *CreateGetDescStride(ResourceNodeType concreteType, ResourceNodeType abstractType, uint64_t descSet, @@ -308,7 +310,7 @@ class BuilderImpl : public BuilderDefs { llvm::Value *CreateLoadPushConstantsPtr(const llvm::Twine &instName = ""); // Calculate a buffer descriptor for an inline buffer - llvm::Value *buildInlineBufferDesc(llvm::Value *descPtr); + llvm::Value *buildInlineBufferDesc(llvm::Value *descPtr, unsigned stride); // Check whether vertex buffer descriptors are in a descriptor array binding instead of the VertexBufferTable. bool useVertexBufferDescArray(); @@ -328,8 +330,11 @@ class BuilderImpl : public BuilderDefs { llvm::Value *scalarizeIfUniform(llvm::Value *value, bool isNonUniform); // Build buffer compact descriptor - llvm::Value *buildBufferCompactDesc(llvm::Value *desc); + llvm::Value *buildBufferCompactDesc(llvm::Value *desc, unsigned stride); + // Create a buffer descriptor. + llvm::Value *createBufferDesc(uint64_t descSet, unsigned binding, llvm::Value *descIndex, unsigned flags, + unsigned stride, const llvm::Twine &instName = ""); // ------------------------------------------------------------------------------------------------------------------- // Image operations @@ -396,6 +401,10 @@ class BuilderImpl : public BuilderDefs { llvm::Value *CreateImageGetLod(unsigned dim, unsigned flags, llvm::Value *imageDesc, llvm::Value *samplerDesc, llvm::Value *coord, const llvm::Twine &instName = ""); + // Create a query of the sample position of given sample id in an image. + llvm::Value *CreateImageGetSamplePosition(unsigned dim, unsigned flags, llvm::Value *imageDesc, llvm::Value *sampleId, + const llvm::Twine &instName = ""); + // Create a ray intersect result with specified node in BVH buffer llvm::Value *CreateImageBvhIntersectRay(llvm::Value *nodePtr, llvm::Value *extent, llvm::Value *origin, llvm::Value *direction, llvm::Value *invDirection, llvm::Value *imageDesc, @@ -621,9 +630,6 @@ class BuilderImpl : public BuilderDefs { // Create a "kill". Only allowed in a fragment shader. llvm::Instruction *CreateKill(const llvm::Twine &instName = ""); - // Create a "debug break". - llvm::Instruction *CreateDebugBreak(const llvm::Twine &instName = ""); - // Create a "readclock". llvm::Instruction *CreateReadClock(bool realtime, const llvm::Twine &instName = ""); @@ -788,6 +794,7 @@ class BuilderImpl : public BuilderDefs { uint16_t getDsSwizzleQuadMode(uint8_t lane0, uint8_t lane1, uint8_t lane2, uint8_t lane3); protected: + llvm::Value *createGroupBallot(llvm::Value *const value, bool excludeHelperLanes); llvm::Value *createGroupBallot(llvm::Value *const value); llvm::Value *createFindMsb(llvm::Value *const mask); }; diff --git a/lgc/include/lgc/patch/LowerDesc.h b/lgc/include/lgc/patch/LowerDesc.h index 8d64effc24..f462f1a53f 100644 --- a/lgc/include/lgc/patch/LowerDesc.h +++ b/lgc/include/lgc/patch/LowerDesc.h @@ -40,7 +40,9 @@ namespace lgc { +class LoadBufferAddrOp; class LoadBufferDescOp; +class LoadStridedBufferDescOp; // ===================================================================================================================== // Pass to lower buffer descriptor loads. @@ -50,7 +52,9 @@ class LowerDesc : public llvm::PassInfoMixin { static llvm::StringRef name() { return "Lower buffer descriptor loads"; } private: + void visitLoadBufferAddr(LoadBufferAddrOp &op); void visitLoadBufferDesc(LoadBufferDescOp &op); + void visitLoadStridedBufferDesc(LoadStridedBufferDescOp &op); llvm::SmallVector m_toErase; PipelineState *m_pipelineState = nullptr; }; diff --git a/lgc/include/lgc/patch/Patch.h b/lgc/include/lgc/patch/Patch.h index e3cc5422b8..f78951b524 100644 --- a/lgc/include/lgc/patch/Patch.h +++ b/lgc/include/lgc/patch/Patch.h @@ -61,7 +61,7 @@ class Patch { // Register all the patching passes into the given pass builder static void registerPasses(llvm::PassBuilder &passBuilder); - static llvm::GlobalVariable *getLdsVariable(PipelineState *pipelineState, llvm::Module *module); + static llvm::Constant *getLdsVariable(PipelineState *pipelineState, llvm::Function *func, bool rtStack = false); protected: static void addOptimizationPasses(lgc::PassManager &passMgr, uint32_t optLevel); diff --git a/lgc/include/lgc/patch/PatchCopyShader.h b/lgc/include/lgc/patch/PatchCopyShader.h index 84b30252c9..f52784a094 100644 --- a/lgc/include/lgc/patch/PatchCopyShader.h +++ b/lgc/include/lgc/patch/PatchCopyShader.h @@ -64,7 +64,7 @@ class PatchCopyShader : public Patch, public llvm::PassInfoMixin> m_outputLocCompSizeMap[MaxGsStreams]; // The dword size of the output value at the new mapped m_importCalls; // List of "call" instructions to import inputs std::vector m_exportCalls; // List of "call" instructions to export outputs diff --git a/lgc/include/lgc/state/Abi.h b/lgc/include/lgc/state/Abi.h index 9c7f5a0fbe..4926198194 100644 --- a/lgc/include/lgc/state/Abi.h +++ b/lgc/include/lgc/state/Abi.h @@ -38,22 +38,22 @@ namespace lgc { -// Internal resource table's virtual bindings +// Internal resource table's offsets in dwords static const unsigned SiDrvTableScratchGfxSrdOffs = 0; -static const unsigned SiDrvTableScratchCsSrdOffs = 1; -static const unsigned SiDrvTableEsRingOutOffs = 2; -static const unsigned SiDrvTableGsRingInOffs = 3; -static const unsigned SiDrvTableGsRingOuT0Offs = 4; -static const unsigned SiDrvTableGsRingOuT1Offs = 5; -static const unsigned SiDrvTableGsRingOuT2Offs = 6; -static const unsigned SiDrvTableGsRingOuT3Offs = 7; -static const unsigned SiDrvTableVsRingInOffs = 8; -static const unsigned SiDrvTableTfBufferOffs = 9; -static const unsigned SiDrvTableHsBuffeR0Offs = 10; -static const unsigned SiDrvTableOffChipParamCache = 11; -static const unsigned SiDrvTableSamplepos = 12; -static const unsigned SiDrvTableTaskPayloadRingOffs = 13; -static const unsigned SiDrvTableTaskDrawDataRingOffs = 14; +static const unsigned SiDrvTableScratchCsSrdOffs = 4; +static const unsigned SiDrvTableEsRingOutOffs = 8; +static const unsigned SiDrvTableGsRingInOffs = 12; +static const unsigned SiDrvTableGsRingOuT0Offs = 16; +static const unsigned SiDrvTableGsRingOuT1Offs = 20; +static const unsigned SiDrvTableGsRingOuT2Offs = 24; +static const unsigned SiDrvTableGsRingOuT3Offs = 28; +static const unsigned SiDrvTableVsRingInOffs = 32; +static const unsigned SiDrvTableTfBufferOffs = 36; +static const unsigned SiDrvTableHsBuffeR0Offs = 40; +static const unsigned SiDrvTableOffChipParamCache = 44; +static const unsigned SiDrvTableSamplepos = 48; +static const unsigned SiDrvTableTaskPayloadRingOffs = 52; +static const unsigned SiDrvTableTaskDrawDataRingOffs = 56; static const unsigned SiStreamoutTableOffs = 0; diff --git a/lgc/include/lgc/state/AbiMetadata.h b/lgc/include/lgc/state/AbiMetadata.h index f50211a18a..f35ab2b638 100644 --- a/lgc/include/lgc/state/AbiMetadata.h +++ b/lgc/include/lgc/state/AbiMetadata.h @@ -42,9 +42,6 @@ namespace Util { namespace Abi { -constexpr unsigned PipelineMetadataMajorVersion = 2; // Pipeline Metadata Major Version -constexpr unsigned PipelineMetadataMinorVersion = 6; // Pipeline Metadata Minor Version - // TODO: Remove and update the version to [3,0] after switching to new register metadata layout constexpr unsigned PipelineMetadataMajorVersionNew = 3; // Pipeline Metadata Major Version constexpr unsigned PipelineMetadataMinorVersionNew = 0; // Pipeline Metadata Minor Version @@ -70,9 +67,7 @@ enum PipelineType : unsigned { // Hardware shader stage enum class HardwareStage : unsigned { - Ls = 0, // Hardware LS stage - Hs, // Hardware hS stage - Es, // Hardware ES stage + Hs = 0, // Hardware HS stage Gs, // Hardware GS stage Vs, // Hardware VS stage Ps, // Hardware PS stage @@ -83,9 +78,7 @@ enum class HardwareStage : unsigned { // Used to represent hardware shader stage. enum HardwareStageFlagBits : unsigned { - HwShaderLs = (1 << static_cast(HardwareStage::Ls)), HwShaderHs = (1 << static_cast(HardwareStage::Hs)), - HwShaderEs = (1 << static_cast(HardwareStage::Es)), HwShaderGs = (1 << static_cast(HardwareStage::Gs)), HwShaderVs = (1 << static_cast(HardwareStage::Vs)), HwShaderPs = (1 << static_cast(HardwareStage::Ps)), @@ -154,6 +147,7 @@ static constexpr char PsSampleMask[] = ".ps_sample_mask"; static constexpr char GraphicsRegisters[] = ".graphics_registers"; static constexpr char ComputeRegisters[] = ".compute_registers"; static constexpr char PsInputSemantic[] = ".ps_input_semantic"; +static constexpr char PsDummyExport[] = ".ps_dummy_export"; static constexpr char PrerasterOutputSemantic[] = ".preraster_output_semantic"; static constexpr char ShaderFunctions[] = ".shader_functions"; }; // namespace PipelineMetadataKey @@ -187,6 +181,7 @@ static constexpr char OffchipLdsEn[] = ".offchip_lds_en"; static constexpr char UserDataRegMap[] = ".user_data_reg_map"; static constexpr char ImageOp[] = ".image_op"; static constexpr char FrontendStackSize[] = ".frontend_stack_size"; +static constexpr char ShaderSpillThreshold[] = ".shader_spill_threshold"; }; // namespace HardwareStageMetadataKey namespace ShaderMetadataKey { @@ -626,7 +621,8 @@ static const char *const ApiStageNames[] = {".task", ".vertex", ".hull", ". ".geometry", ".mesh", ".pixel", ".compute"}; // The names of hardware shader stages used in PAL metadata, in Util::Abi::HardwareStage order. -static const char *const HwStageNames[] = {".ls", ".hs", ".es", ".gs", ".vs", ".ps", ".cs"}; +static const char *const HwStageNames[static_cast(Util::Abi::HardwareStage::Count)] = {".hs", ".gs", ".vs", + ".ps", ".cs"}; // The name of the metadata node containing PAL metadata. This name is part of the interface from LGC into // the LLVM AMDGPU back-end when compiling for PAL ABI. @@ -635,27 +631,18 @@ static const char PalMetadataName[] = "amdgpu.pal.metadata.msgpack"; // PAL metadata SPI register numbers for the start of user data. // // Note on LS/HS confusion: -// <=GFX8 claims LS registers are from 0x2D4C and HS registers are from 0x2D0C -// GFX9 claims LS registers are from 0x2D0C, and the LS-HS merged shader uses them // GFX10 claims HS registers are from 0x2D0C, and the LS-HS merged shader uses them. // So here we call the registers from 0x2D0C "HS" and have the LS-HS merged shader using them, for -// consistency. That contradicts the GFX9 docs, but has the same effect. +// consistency. // -// First the ones that only apply up to GFX8 -constexpr unsigned int mmSPI_SHADER_USER_DATA_LS_0 = 0x2D4C; -// Up to GFX9 only -constexpr unsigned int mmSPI_SHADER_USER_DATA_ES_0 = 0x2CCC; // For GXF9, used for ES-GS merged shader -// Then the ones that apply to all hardware. constexpr unsigned int mmCOMPUTE_USER_DATA_0 = 0x2E40; -constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_0 = 0x2C8C; // For GFX10, used for ES-GS merged shader and NGG -constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_0 = 0x2D0C; // For GFX9+, Used for LS-HS merged shader +constexpr unsigned int mmSPI_SHADER_USER_DATA_GS_0 = 0x2C8C; // Used for ES-GS merged shader and NGG +constexpr unsigned int mmSPI_SHADER_USER_DATA_HS_0 = 0x2D0C; // Used for LS-HS merged shader constexpr unsigned int mmSPI_SHADER_USER_DATA_PS_0 = 0x2C0C; constexpr unsigned int mmSPI_SHADER_USER_DATA_VS_0 = 0x2C4C; // The RSRC1 registers. -constexpr unsigned mmSPI_SHADER_PGM_RSRC1_LS = 0x2D4A; constexpr unsigned mmSPI_SHADER_PGM_RSRC1_HS = 0x2D0A; -constexpr unsigned mmSPI_SHADER_PGM_RSRC1_ES = 0x2CCA; constexpr unsigned mmSPI_SHADER_PGM_RSRC1_GS = 0x2C8A; constexpr unsigned mmSPI_SHADER_PGM_RSRC1_VS = 0x2C4A; constexpr unsigned mmSPI_SHADER_PGM_RSRC1_PS = 0x2C0A; @@ -685,6 +672,8 @@ constexpr unsigned mmPA_SC_AA_CONFIG = 0xA2F8; constexpr unsigned mmVGT_GS_OUT_PRIM_TYPE = 0xA29B; constexpr unsigned mmVGT_GS_OUT_PRIM_TYPE_GFX11 = 0xC266; +constexpr unsigned mmSPI_SHADER_PGM_LO_GS = 0x2C88; + // Register bitfield layout. // General RSRC1 register, enough to get the VGPR and SGPR counts. @@ -823,4 +812,167 @@ union SPI_PS_INPUT_CNTL_0 { unsigned u32All; }; +typedef enum SPI_PNT_SPRITE_OVERRIDE { + SPI_PNT_SPRITE_SEL_0 = 0x00000000, + SPI_PNT_SPRITE_SEL_1 = 0x00000001, + SPI_PNT_SPRITE_SEL_S = 0x00000002, + SPI_PNT_SPRITE_SEL_T = 0x00000003, + SPI_PNT_SPRITE_SEL_NONE = 0x00000004, +} SPI_PNT_SPRITE_OVERRIDE; + +typedef enum SPI_SHADER_FORMAT { + SPI_SHADER_NONE = 0x00000000, + SPI_SHADER_1COMP = 0x00000001, + SPI_SHADER_2COMP = 0x00000002, + SPI_SHADER_4COMPRESS = 0x00000003, + SPI_SHADER_4COMP = 0x00000004, +} SPI_SHADER_FORMAT; + +typedef enum VGT_GS_CUT_MODE { + GS_CUT_1024__HASHWVS = 0x00000000, + GS_CUT_512__HASHWVS = 0x00000001, + GS_CUT_256__HASHWVS = 0x00000002, + GS_CUT_128__HASHWVS = 0x00000003, +} VGT_GS_CUT_MODE; + +typedef enum VGT_GS_MODE_TYPE { + GS_OFF = 0x00000000, + GS_SCENARIO_A = 0x00000001, + GS_SCENARIO_B = 0x00000002, + GS_SCENARIO_G = 0x00000003, + GS_SCENARIO_C = 0x00000004, + SPRITE_EN = 0x00000005, +} VGT_GS_MODE_TYPE; + +typedef enum VGT_GS_OUTPRIM_TYPE { + POINTLIST = 0x00000000, + LINESTRIP = 0x00000001, + TRISTRIP = 0x00000002, + RECTLIST__GFX09 = 0x00000003, + RECT_2D__GFX10PLUS = 0x00000003, + RECTLIST__GFX10PLUS = 0x00000004, +} VGT_GS_OUTPRIM_TYPE; + +typedef enum ZOrder { + LATE_Z = 0x00000000, + EARLY_Z_THEN_LATE_Z = 0x00000001, + RE_Z = 0x00000002, + EARLY_Z_THEN_RE_Z = 0x00000003, +} ZOrder; + +typedef enum VGT_STAGES_ES_EN { + ES_STAGE_OFF = 0x00000000, + ES_STAGE_DS = 0x00000001, + ES_STAGE_REAL = 0x00000002, + RESERVED_ES = 0x00000003, +} VGT_STAGES_ES_EN; + +typedef enum VGT_STAGES_GS_EN { + GS_STAGE_OFF = 0x00000000, + GS_STAGE_ON = 0x00000001, +} VGT_STAGES_GS_EN; + +typedef enum VGT_STAGES_HS_EN { + HS_STAGE_OFF = 0x00000000, + HS_STAGE_ON = 0x00000001, +} VGT_STAGES_HS_EN; + +typedef enum VGT_STAGES_LS_EN { + LS_STAGE_OFF = 0x00000000, + LS_STAGE_ON = 0x00000001, + CS_STAGE_ON = 0x00000002, + RESERVED_LS = 0x00000003, +} VGT_STAGES_LS_EN; +typedef enum VGT_STAGES_VS_EN { + VS_STAGE_REAL = 0x00000000, + VS_STAGE_DS = 0x00000001, + VS_STAGE_COPY_SHADER = 0x00000002, + RESERVED_VS = 0x00000003, +} VGT_STAGES_VS_EN; + +typedef enum ConservativeZExport { + EXPORT_ANY_Z = 0x00000000, + EXPORT_LESS_THAN_Z = 0x00000001, + EXPORT_GREATER_THAN_Z = 0x00000002, + EXPORT_RESERVED = 0x00000003, +} ConservativeZExport; + +typedef enum VGT_TESS_TYPE { + TESS_ISOLINE = 0x00000000, + TESS_TRIANGLE = 0x00000001, + TESS_QUAD = 0x00000002, +} VGT_TESS_TYPE; + +typedef enum VGT_TESS_PARTITION { + PART_INTEGER = 0x00000000, + PART_POW2 = 0x00000001, + PART_FRAC_ODD = 0x00000002, + PART_FRAC_EVEN = 0x00000003, +} VGT_TESS_PARTITION; + +typedef enum VGT_TESS_TOPOLOGY { + OUTPUT_POINT = 0x00000000, + OUTPUT_LINE = 0x00000001, + OUTPUT_TRIANGLE_CW = 0x00000002, + OUTPUT_TRIANGLE_CCW = 0x00000003, +} VGT_TESS_TOPOLOGY; + +typedef enum VGT_DIST_MODE { + NO_DIST = 0x00000000, + PATCHES = 0x00000001, + DONUTS = 0x00000002, + TRAPEZOIDS = 0x00000003, +} VGT_DIST_MODE; + +typedef enum SWIZZLE_MODE_ENUM { + SW_LINEAR = 0x00000000, + SW_256B_S = 0x00000001, + SW_256B_D = 0x00000002, + SW_256B_R = 0x00000003, + SW_4KB_Z = 0x00000004, + SW_4KB_S = 0x00000005, + SW_4KB_D = 0x00000006, + SW_4KB_R = 0x00000007, + SW_64KB_Z = 0x00000008, + SW_64KB_S = 0x00000009, + SW_64KB_D = 0x0000000a, + SW_64KB_R = 0x0000000b, + SW_64KB_Z_T = 0x00000010, + SW_64KB_S_T = 0x00000011, + SW_64KB_D_T = 0x00000012, + SW_64KB_R_T = 0x00000013, + SW_4KB_Z_X = 0x00000014, + SW_4KB_S_X = 0x00000015, + SW_4KB_D_X = 0x00000016, + SW_4KB_R_X = 0x00000017, + SW_64KB_Z_X = 0x00000018, + SW_64KB_S_X = 0x00000019, + SW_64KB_D_X = 0x0000001a, + SW_64KB_R_X = 0x0000001b, + SW_VAR_Z__GFX09 = 0x0000000c, + SW_VAR_S__GFX09 = 0x0000000d, + SW_VAR_D__GFX09 = 0x0000000e, + SW_VAR_R__GFX09 = 0x0000000f, + SW_VAR_S_X__GFX09 = 0x0000001d, + SW_VAR_D_X__GFX09 = 0x0000001e, + SW_VAR_Z_X__GFX09_10 = 0x0000001c, + SW_VAR_R_X__GFX09_10 = 0x0000001f, + SW_VAR_Z__GFX10CORE = 0x0000000c, + SW_VAR_S__GFX10CORE = 0x0000000d, + SW_VAR_D__GFX10CORE = 0x0000000e, + SW_VAR_R__GFX10CORE = 0x0000000f, + SW_VAR_S_X__GFX10CORE = 0x0000001d, + SW_VAR_D_X__GFX10CORE = 0x0000001e, +#if CHIP_HDR_NAVI31 || CHIP_HDR_NAVI32 || CHIP_HDR_NAVI33 || CHIP_HDR_PHOENIX1 + SW_256KB_Z__GFX11 = 0x0000000c, + SW_256KB_S__GFX11 = 0x0000000d, + SW_256KB_D__GFX11 = 0x0000000e, + SW_256KB_R__GFX11 = 0x0000000f, + SW_256KB_Z_X__GFX11 = 0x0000001c, + SW_256KB_S_X__GFX11 = 0x0000001d, + SW_256KB_D_X__GFX11 = 0x0000001e, + SW_256KB_R_X__GFX11 = 0x0000001f, +#endif +} SWIZZLE_MODE_ENUM; + } // namespace lgc diff --git a/lgc/include/lgc/state/PalMetadata.h b/lgc/include/lgc/state/PalMetadata.h index 7c204f9a9e..5a1fa50938 100644 --- a/lgc/include/lgc/state/PalMetadata.h +++ b/lgc/include/lgc/state/PalMetadata.h @@ -87,9 +87,9 @@ struct FsInputMappings { class PalMetadata { public: // Constructors - PalMetadata(PipelineState *pipelineState, bool useRegisterFieldFormat); - PalMetadata(PipelineState *pipelineState, llvm::StringRef blob, bool useRegisterFieldFormat); - PalMetadata(PipelineState *pipelineState, llvm::Module *module, bool useRegisterFieldFormat); + PalMetadata(PipelineState *pipelineState); + PalMetadata(PipelineState *pipelineState, llvm::StringRef blob); + PalMetadata(PipelineState *pipelineState, llvm::Module *module); PalMetadata(const PalMetadata &) = delete; PalMetadata &operator=(const PalMetadata &) = delete; @@ -104,30 +104,14 @@ class PalMetadata { // Get the MsgPack document for explicit manipulation. Only ConfigBuilder* uses this. llvm::msgpack::Document *getDocument() { return m_document; } - // Set the PAL metadata SPI register for one user data entry - void setUserDataEntry(ShaderStageEnum stage, unsigned userDataIndex, unsigned userDataValue, unsigned dwordCount = 1); - void setUserDataEntry(ShaderStageEnum stage, unsigned userDataIndex, UserDataMapping userDataValue, - unsigned dwordCount = 1) { - setUserDataEntry(stage, userDataIndex, static_cast(userDataValue), dwordCount); - } - // Mark that the user data spill table is used at the given offset. The SpillThreshold PAL metadata entry is // set to the minimum of any call to this function in any shader. - void setUserDataSpillUsage(unsigned dwordOffset); + void setUserDataSpillUsage(unsigned dwordOffset, std::optional shaderStage); // Fix up registers. Any user data register that has one of the unlinked UserDataMapping values defined in // AbiUnlinked.h is fixed up by looking at pipeline state; And some dynamic states also need to be fixed. void fixUpRegisters(); - // Get a register value in PAL metadata. - unsigned getRegister(unsigned regNum); - - // Set a register value in PAL metadata. If the register has a value set already, it gets overwritten. - void setRegister(unsigned regNum, unsigned value); - - // Get the VS entry register info. Used by the linker to generate the fetch shader. - void getVsEntryRegInfo(VsEntryRegInfo ®Info); - // Store the color export info in the PAL metadata void addColorExportInfo(llvm::ArrayRef exports); @@ -208,13 +192,13 @@ class PalMetadata { // Set userDataLimit to the given value void setUserDataLimit(unsigned value); + // Set Util::Abi::PipelineMetadataKey::PsDummyExport to true + void setPsDummyExport(); + private: // Initialize the PalMetadata object after reading in already-existing PAL metadata if any void initialize(); - // Get the first user data register number for the given shader stage. - unsigned getUserDataReg0(ShaderStageEnum stage); - // Get the llvm type that corresponds to tyName. Returns nullptr if no such type exists. llvm::Type *getLlvmType(llvm::StringRef tyName) const; @@ -236,30 +220,14 @@ class PalMetadata { // The maximum possible value for the spill threshold entry in the PAL metadata. static constexpr uint64_t MAX_SPILL_THRESHOLD = USHRT_MAX; - unsigned getUserDataCount(unsigned callingConv); - unsigned getCallingConventionForFirstHardwareShaderStage(std::string &hwStageName); - unsigned getFirstUserDataReg(unsigned callingConv); - unsigned getNumberOfSgprsBeforeUserData(unsigned conv); - unsigned getOffsetOfUserDataReg(std::map::iterator firstUserDataNode, - UserDataMapping userDataMapping); - unsigned getOffsetOfUserDataReg(llvm::msgpack::ArrayDocNode &userDataReg, UserDataMapping userDataRegMapping); - unsigned getNumberOfSgprsAfterUserData(unsigned callingConv); - unsigned getVertexIdOffset(unsigned callingConv); - unsigned getInstanceIdOffset(unsigned callingConv); - unsigned getVgprCount(unsigned callingConv); - bool isWave32(unsigned callingConv); - - PipelineState *m_pipelineState; // PipelineState - llvm::msgpack::Document *m_document; // The MsgPack document - llvm::msgpack::MapDocNode m_pipelineNode; // MsgPack map node for amdpal.pipelines[0] - llvm::msgpack::MapDocNode m_registers; // MsgPack map node for amdpal.pipelines[0].registers - llvm::msgpack::DocNode m_colorExports; // MsgPack map node for amdpal.pipelines[0].colorExports - // Mapping from ShaderStageEnum to SPI user data register start, allowing for merged shaders and NGG. - unsigned m_userDataRegMapping[ShaderStage::CountInternal] = {}; + PipelineState *m_pipelineState; // PipelineState + llvm::msgpack::Document *m_document; // The MsgPack document + llvm::msgpack::MapDocNode m_pipelineNode; // MsgPack map node for amdpal.pipelines[0] + llvm::msgpack::MapDocNode m_registers; // MsgPack map node for amdpal.pipelines[0].registers + llvm::msgpack::DocNode m_colorExports; // MsgPack map node for amdpal.pipelines[0].colorExports llvm::msgpack::DocNode *m_userDataLimit; // Maximum so far number of user data dwords used llvm::msgpack::DocNode *m_spillThreshold; // Minimum so far dword offset used in user data spill table llvm::SmallString<0> m_fsInputMappingsBlob; // Buffer for returning FS input mappings blob to LGC client - bool m_useRegisterFieldFormat; // Whether to use new PAL metadata in ELF }; } // namespace lgc diff --git a/lgc/include/lgc/state/PipelineState.h b/lgc/include/lgc/state/PipelineState.h index 74efa28d8b..6437c15512 100644 --- a/lgc/include/lgc/state/PipelineState.h +++ b/lgc/include/lgc/state/PipelineState.h @@ -317,6 +317,13 @@ class PipelineState final : public Pipeline { // Gets subgroup size for the specified shader stage unsigned getShaderSubgroupSize(ShaderStageEnum stage); + // Gets Util::Abi::PipelineType for pipeline + unsigned getAbiPipelineType(); + // Gets map of ShaderStageEnum to Util::Abi::HardwareStageFlagBits + const llvm::DenseMap *getAbiHwShaderMap(); + // Gets Util::Abi::HardwareStageFlagBits for the given shader stage + unsigned getShaderHwStageMask(ShaderStageEnum stage); + // Set the default wave size for the specified shader stage void setShaderDefaultWaveSize(ShaderStageEnum stage); @@ -332,21 +339,24 @@ class PipelineState final : public Pipeline { // Get NGG control settings NggControl *getNggControl() { return &m_nggControl; } + // Is NGG enabled + bool isNggEnabled() const; + // Checks if SW-emulated mesh pipeline statistics is needed bool needSwMeshPipelineStats() const; // Checks if row export for mesh shader is enabled or not bool enableMeshRowExport() const; - // Checks if register field value format is used or not - bool useRegisterFieldFormat() const { return m_registerFieldFormat; } - // Checks if SW-emulated stream-out should be enabled bool enableSwXfb(); // Gets resource usage of the specified shader stage ResourceUsage *getShaderResourceUsage(ShaderStageEnum shaderStage); + // Gets static LDS usage of the specified shader stage + unsigned getShaderStaticLdsUsage(ShaderStageEnum shaderStage, bool rtStack); + // Gets interface data of the specified shader stage InterfaceData *getShaderInterfaceData(ShaderStageEnum shaderStage); @@ -435,6 +445,13 @@ class PipelineState final : public Pipeline { // Get user data for a specific shader stage llvm::ArrayRef getUserDataMap(ShaderStageEnum shaderStage) const { return m_userDataMaps[shaderStage]; } + // Set spill_threshold for a specific shader stage + void setSpillThreshold(ShaderStageEnum shaderStage, unsigned spillThreshold) { + m_shaderSpillThreshold[shaderStage] = spillThreshold; + } + + // Get spill_threshold for a specific shader stage + unsigned getSpillThreshold(ShaderStageEnum shaderStage) { return m_shaderSpillThreshold[shaderStage]; } // ----------------------------------------------------------------------------------------------------------------- // Utility method templates to read and write IR metadata, used by PipelineState and ShaderModes @@ -577,6 +594,9 @@ class PipelineState final : public Pipeline { void recordGraphicsState(llvm::Module *module); void readGraphicsState(llvm::Module *module); + // ABI Shader Map + void buildAbiHwShaderMap(); + std::string m_lastError; // Error to be reported by getLastError() bool m_emitLgc = false; // Whether -emit-lgc is on // Whether generating pipeline or unlinked part-pipeline @@ -596,7 +616,6 @@ class PipelineState final : public Pipeline { bool m_gsOnChip = false; // Whether to use GS on-chip mode bool m_meshRowExport = false; // Enable mesh shader row export or not - bool m_registerFieldFormat = false; // Use register field format NggControl m_nggControl = {}; // NGG control settings ShaderModes m_shaderModes; // Shader modes for this pipeline unsigned m_deviceIndex = 0; // Device index @@ -615,11 +634,15 @@ class PipelineState final : public Pipeline { bool m_outputPackState[ShaderStage::GfxCount] = {}; // The output packable state per shader stage XfbStateMetadata m_xfbStateMetadata = {}; // Transform feedback state metadata llvm::SmallVector m_userDataMaps[ShaderStage::CountInternal]; // The user data per-shader + unsigned m_shaderSpillThreshold[ShaderStage::CountInternal] = {}; // The spillThreshold per-shader struct { float inner[2]; // default tessellation inner level float outer[4]; // default tessellation outer level } m_tessLevel; + + llvm::DenseMap m_abiHwShaderMap; + unsigned m_abiPipelineType = 0; }; // ===================================================================================================================== diff --git a/lgc/include/lgc/state/ResourceUsage.h b/lgc/include/lgc/state/ResourceUsage.h index 9ed5ecc076..b2668cb272 100644 --- a/lgc/include/lgc/state/ResourceUsage.h +++ b/lgc/include/lgc/state/ResourceUsage.h @@ -458,18 +458,14 @@ struct ResourceUsage { } gs; struct { - // Map from IDs of built-in outputs to locations of generic per-vertex outputs (used by vertex export to export - // built-in outputs to fragment shader) - std::map builtInExportLocs; - - // Map from IDs of per-primitive built-in outputs to locations of generic per-primitive outputs (used by vertex - // export to export built-in outputs to fragment shader) - std::map perPrimitiveBuiltInExportLocs; - - // Count of mapped location for generic outputs (excluding those special locations to which the built-ins - // are mapped) - unsigned genericOutputMapLocCount = 0; - unsigned perPrimitiveGenericOutputMapLocCount = 0; + // Map from built-in output IDs to their export slots (to fragment shader): + std::map vertexBuiltInExportSlots; + std::map primitiveBuiltInExportSlots; + + // Map from output locations to their number of components: > (including + // those special outputs to which built-ins are mapped) + std::map> vertexOutputComponents; + std::map> primitiveOutputComponents; } mesh; struct { diff --git a/lgc/include/lgc/state/TargetInfo.h b/lgc/include/lgc/state/TargetInfo.h index 555346582d..6a79787186 100644 --- a/lgc/include/lgc/state/TargetInfo.h +++ b/lgc/include/lgc/state/TargetInfo.h @@ -61,7 +61,6 @@ struct GpuProperty { unsigned gsOnChipDefaultPrimsPerSubgroup; // Default target number of primitives per subgroup for GS on-chip mode. unsigned gsOnChipDefaultLdsSizePerSubgroup; // Default value for the maximum LDS size per subgroup for unsigned gsOnChipMaxLdsSize; // Max LDS size used by GS on-chip mode (in dwords) - unsigned ldsSizeDwordGranularityShift; // Amount of bits used to shift the LDS_SIZE register field // TODO: Setup gsPrimBufferDepth from hardware config option, will be done in another change. unsigned gsPrimBufferDepth; // Comes from the hardware GPU__GC__GSPRIM_BUFF_DEPTH configuration option diff --git a/lgc/interface/lgc/Builder.h b/lgc/interface/lgc/Builder.h index cf5555bc30..f0df5c6a1f 100644 --- a/lgc/interface/lgc/Builder.h +++ b/lgc/interface/lgc/Builder.h @@ -313,6 +313,7 @@ class BuilderDefs : public BuilderCommon { ImageFlagEnforceReadFirstLaneSampler = 0x100, // Whether enabling readfirstlane on the sampler descriptor ImageFlagNotAliased = 0x200, // Whether the image is known not to alias any other memory object ImageFlagInvariant = 0x400, // Invariant load + ImageFlagSamplePatternOffset = 0x800, // Retrieving sample pattern offset in dwords for specified image }; // Address array indices for image sample and gather methods. Where an optional entry is missing (either @@ -431,8 +432,8 @@ class Builder : public BuilderDefs { // @param value2 : Input value 2 // @param fmfSource : Instruction to copy fast math flags from; nullptr to get from Builder // @param name : Name to give instruction - llvm::CallInst *CreateBinaryIntrinsic(llvm::Intrinsic::ID id, llvm::Value *value1, llvm::Value *value2, - llvm::Instruction *fmfSource = nullptr, const llvm::Twine &name = ""); + llvm::Value *CreateBinaryIntrinsic(llvm::Intrinsic::ID id, llvm::Value *value1, llvm::Value *value2, + llvm::Instruction *fmfSource = nullptr, const llvm::Twine &name = ""); // // @param id : Intrinsic ID @@ -1117,6 +1118,16 @@ class Builder : public BuilderDefs { llvm::Value *CreateImageGetLod(unsigned dim, unsigned flags, llvm::Value *imageDesc, llvm::Value *samplerDesc, llvm::Value *coord, const llvm::Twine &instName = ""); + // Create a query of the sample position of given sample id in an image. Returns an v2f32 value. + // + // @param dim : Image dimension + // @param flags : ImageFlag* flags + // @param imageDesc : Image descriptor or texel buffer descriptor + // @param sampleId : Sample ID + // @param instName : Name to give instruction(s) + llvm::Value *CreateImageGetSamplePosition(unsigned dim, unsigned flags, llvm::Value *imageDesc, llvm::Value *sampleId, + const llvm::Twine &instName = ""); + // Create a ray intersect result with specified node in BVH buffer. // nodePtr is the combination of BVH node offset type. // @@ -1376,11 +1387,6 @@ class Builder : public BuilderDefs { // @param instName : Name to give instruction(s) llvm::Instruction *CreateKill(const llvm::Twine &instName = ""); - // Create a "debug break". - // - // @param instName : Name to give instruction(s) - llvm::Instruction *CreateDebugBreak(const llvm::Twine &instName = ""); - // Create a "readclock". // // @param realtime : Whether to read real-time clock counter diff --git a/lgc/interface/lgc/BuilderCommon.h b/lgc/interface/lgc/BuilderCommon.h index 3f17329334..8fb34c6628 100644 --- a/lgc/interface/lgc/BuilderCommon.h +++ b/lgc/interface/lgc/BuilderCommon.h @@ -94,6 +94,11 @@ class BuilderCommon : public llvm_dialects::Builder { // @param ty : pointer type. llvm::Value *CreateAllocaAtFuncEntry(llvm::Type *ty); + // Create a "debug break". + // + // @param instName : Name to give instruction(s) + llvm::Instruction *CreateDebugBreak(const llvm::Twine &instName = ""); + // ----------------------------------------------------------------------------------------------------------------- // Cooperative matrix operation. diff --git a/lgc/interface/lgc/CommonDefs.h b/lgc/interface/lgc/CommonDefs.h index b2cb37fd0b..dd718b92d5 100644 --- a/lgc/interface/lgc/CommonDefs.h +++ b/lgc/interface/lgc/CommonDefs.h @@ -146,6 +146,15 @@ enum AddrSpace { ADDR_SPACE_MAX = ADDR_SPACE_BUFFER_STRIDED_POINTER }; +// Max number of threads per subgroup in NGG mode. +constexpr unsigned NggMaxThreadsPerSubgroup = 256; + +// Max number of waves per subgroup in NGG mode. +constexpr unsigned NggMaxWavesPerSubgroup = NggMaxThreadsPerSubgroup / 32; + +constexpr unsigned EsVertsOffchipGsOrTess = 250; +constexpr unsigned GsPrimsOffchipGsOrTess = 126; + } // namespace lgc namespace llvm { // Enable iteration over shader stages with `lgc::enumRange()`. diff --git a/lgc/interface/lgc/LgcDialect.td b/lgc/interface/lgc/LgcDialect.td index ea971d1527..e44fe74f5d 100644 --- a/lgc/interface/lgc/LgcDialect.td +++ b/lgc/interface/lgc/LgcDialect.td @@ -120,6 +120,17 @@ def BufferPtrDiffOp : LgcOp<"buffer.ptr.diff", [Memory<[]>, WillReturn]> { }]; } +def LoadBufferAddrOp : LgcOp<"load.buffer.addr", [Memory<[]>, WillReturn]> { + let arguments = (ins AttrI64:$desc_set, AttrI32:$binding, I32:$desc_index, + AttrI32:$flags); + let results = (outs I64:$result); + + let summary = "create a load of a buffer descriptor and convert to 64-bit address"; + let description = [{ + Return the i64 address. This works whether the descriptor is compact or not. + }]; +} + def LoadBufferDescOp : LgcOp<"load.buffer.desc", [Memory<[]>, WillReturn]> { let arguments = (ins AttrI64:$desc_set, AttrI32:$binding, I32:$desc_index, AttrI32:$flags); @@ -133,6 +144,22 @@ def LoadBufferDescOp : LgcOp<"load.buffer.desc", [Memory<[]>, WillReturn]> { }]; } +def LoadStridedBufferDescOp : LgcOp<"load.strided.buffer.desc", [Memory<[]>, WillReturn]> { + let arguments = (ins AttrI64:$desc_set, AttrI32:$binding, I32:$desc_index, AttrI32:$flags, AttrI32:$stride); + let results = (outs BufferStridedPointer:$result); + + let summary = "create a load of a strided buffer descriptor"; + let description = [{ + Return the strided buffer descriptor pointer. + + 'stride' If stride is 0, then the descriptor must be a full descriptor in memory (can't be inline or compact), and must be a strided descriptor. + Otherwise, the descriptor must either be a full strided descriptor in memory, in which case the descriptor's stride is used; + alternatively, the descriptor can be an inline or compact buffer, in which case stride is used for the stride of the returned buffer. + + `flags` must not contain `BufferFlagAddress` for this Op. + }]; +} + def DebugPrintfOp : LgcOp<"debug.printf", [Memory<[(readwrite InaccessibleMem)]>, WillReturn]> { let arguments = (ins BufferPointer:$buffer, ConstantPointer:$format, varargs:$args); let results = (outs); @@ -227,30 +254,19 @@ def GetMeshBuiltinInputOp : LgcOp<"get.mesh.builtin.input", [Memory<[]>, WillRet }]; } -def WriteMeshVertexOutputOp : LgcOp<"write.mesh.vertex.output", [Memory<[]>]> { - let arguments = (ins I32:$output_offset, I32:$vertex_index, value:$output_value); +def WriteMeshOutputOp : LgcOp<"write.mesh.output", [Memory<[]>]> { + let arguments = (ins AttrI1:$is_primitive, AttrI32:$location, I32:$location_offset, I32:$component_index, I32:$prim_or_vertex_index, value:$output_value); let results = (outs); - let summary = "Write mesh shader vertex outputs"; + let summary = "Write mesh shader primitive/vertex outputs"; let description = [{ - In the mesh shader, write mesh shader vertex outputs to LDS. + In the mesh shader, write mesh shader primitive/vertex outputs to LDS. - `output_offset` is the relative offset of this output (in dwords) within all outputs of the indexed vertex. - `vertex_index` is the vertex index specifying which vertex to write. - `output_value` is the output value to write. - }]; -} - -def WriteMeshPrimitiveOutputOp : LgcOp<"write.mesh.primitive.output", [Memory<[]>]> { - let arguments = (ins I32:$output_offset, I32:$primitive_index, value:$output_value); - let results = (outs); - - let summary = "Write mesh shader primitive outputs"; - let description = [{ - In the mesh shader, write mesh shader primitive outputs to LDS. - - `output_offset` is the relative offset of this output (in dwords) within all outputs of the indexed primitive. - `primitive_index` is the primitive index specifying which primitive to write. + `is_primitive` indicates if this write is for a primitive output or for a vertex output. + `location` is the start location of this output. + `location_offset` is the relative location offset of this output, used by arrayed outputs. + `component_index` is the component index of this output when component addressing is involved. + `prim_or_vertex_index` is the primitive/vertex index specifying which primitive/vertex to write. `output_value` is the output value to write. }]; } @@ -554,3 +570,16 @@ def CooperativeRowAccExpandOp : LgcOp<"cooperative.rowacc.expand", [Memory<[]>, 'col_major' indicate how to expand the cooperative row acculumlator data by row or col. }]; } + +def LoadDriverTableEntryOp : LgcOp<"load.driver.table.entry", [Memory<[]>, WillReturn]> { + let arguments = (ins AttrI32:$offset); + let results = (outs value:$entry); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "load a driver table entry from specified offset."; + let description = [{ + The driver table is setup by PAL and contains global information such as descriptors for tessellation ring buffers, attributes-through-memory buffer, etc. + `offset` is the offset into the driver table, in unit of dwords. + }]; +} diff --git a/lgc/interface/lgc/ModuleBunch.h b/lgc/interface/lgc/ModuleBunch.h index cec8ece044..39966a80ac 100644 --- a/lgc/interface/lgc/ModuleBunch.h +++ b/lgc/interface/lgc/ModuleBunch.h @@ -80,6 +80,10 @@ class ModuleBunch { // Dump the module to stderr (for debugging). void dump() const; + bool IsNewDbgInfoFormat = false; + + void setIsNewDbgInfoFormat(bool UseNewFormat) { llvm_unreachable("Should never be called!"); } + private: SmallVector> Modules; }; diff --git a/lgc/interface/lgc/Pipeline.h b/lgc/interface/lgc/Pipeline.h index 2fab40cd88..666c639922 100644 --- a/lgc/interface/lgc/Pipeline.h +++ b/lgc/interface/lgc/Pipeline.h @@ -127,7 +127,7 @@ static const char SampleShadingMetaName[] = "lgc.sample.shading"; // The front-end should zero-initialize a struct with "= {}" in case future changes add new fields. // Note: new fields must be added to the end of this structure to maintain test compatibility. union Options { - unsigned u32All[42]; + unsigned u32All[44]; struct { uint64_t hash[2]; // Pipeline hash to set in ELF PAL metadata unsigned includeDisassembly; // If set, the disassembly for all compiled shaders will be included @@ -188,10 +188,14 @@ union Options { unsigned rtTriCompressMode; // Ray tracing triangle compression mode bool useGpurt; // Whether GPURT is used bool reserved21; - bool disablePerCompFetch; // Disable per component fetch in uber fetch shader. - bool maskOffNullDescriptorTypeField; // If true, mask off the type field of word3 from a null descriptor. - bool vbAddressLowBitsKnown; // Use vertex buffer offset low bits from driver. - bool enableExtendedRobustBufferAccess; // Enable the extended robust buffer access + bool disablePerCompFetch; // Disable per component fetch in uber fetch shader. + bool maskOffNullDescriptorTypeField; // If true, mask off the type field of word3 from a null descriptor. + bool vbAddressLowBitsKnown; // Use vertex buffer offset low bits from driver. + bool enableExtendedRobustBufferAccess; // Enable the extended robust buffer access + bool sampleMaskExportOverridesAlphaToCoverage; // Whether to use sample mask export overriding alpha to coverage + bool disableSampleCoverageAdjust; // Disable the adjustment of sample coverage + bool forceFragColorDummyExport; // Force dummy export is added to fragment shader color export. + unsigned reserved22; }; }; static_assert(sizeof(Options) == sizeof(Options::u32All)); @@ -675,6 +679,7 @@ struct FragmentShaderMode { ConservativeDepth conservativeDepth; ConservativeDepth conservativeStencilFront; ConservativeDepth conservativeStencilBack; + unsigned waveOpsRequireHelperLanes; }; // Kind of derivativeMode: diff --git a/lgc/interface/lgc/RayTracingLibrarySummary.h b/lgc/interface/lgc/RayTracingLibrarySummary.h index a1698c3988..9c65fd32ba 100644 --- a/lgc/interface/lgc/RayTracingLibrarySummary.h +++ b/lgc/interface/lgc/RayTracingLibrarySummary.h @@ -59,6 +59,10 @@ struct RayTracingLibrarySummary { // attributes (no AHS/IS/CHS). unsigned maxHitAttributeSize = 0; + // The maximum occurring number of payload registers in the pipeline, which will be taken into account for Traversal + // module so that it sees the correct maximum payload size of a pipeline. + unsigned maxUsedPayloadRegisterCount = 0; + // Whether a kernel entry function was built for this library. bool hasKernelEntry = false; diff --git a/lgc/patch/ConfigBuilderBase.cpp b/lgc/patch/ConfigBuilderBase.cpp index f699ec1f66..f2c4bb9c2e 100644 --- a/lgc/patch/ConfigBuilderBase.cpp +++ b/lgc/patch/ConfigBuilderBase.cpp @@ -66,13 +66,11 @@ ConfigBuilderBase::ConfigBuilderBase(Module *module, PipelineState *pipelineStat m_pipelineNode = m_document->getRoot().getMap(true)[Util::Abi::PalCodeObjectMetadataKey::Pipelines].getArray(true)[0].getMap(true); - if (m_pipelineState->useRegisterFieldFormat()) { - if (m_pipelineState->isGraphics()) - m_graphicsRegistersNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters].getMap(true); + if (m_pipelineState->isGraphics()) + m_graphicsRegistersNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters].getMap(true); - if (m_pipelineState->hasShaderStage(ShaderStage::Compute) || m_pipelineState->hasShaderStage(ShaderStage::Task)) - m_computeRegistersNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::ComputeRegisters].getMap(true); - } + if (m_pipelineState->hasShaderStage(ShaderStage::Compute) || m_pipelineState->hasShaderStage(ShaderStage::Task)) + m_computeRegistersNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::ComputeRegisters].getMap(true); setApiName(pipelineState->getClient()); } diff --git a/lgc/patch/FragColorExport.cpp b/lgc/patch/FragColorExport.cpp index 4dedd2320c..36ac78c7c1 100644 --- a/lgc/patch/FragColorExport.cpp +++ b/lgc/patch/FragColorExport.cpp @@ -468,7 +468,7 @@ PreservedAnalyses LowerFragColorExport::run(Module &module, ModuleAnalysisManage } FragColorExport fragColorExport(m_pipelineState->getLgcContext()); - bool dummyExport = m_resUsage->builtInUsage.fs.discard; + bool dummyExport = m_resUsage->builtInUsage.fs.discard || m_pipelineState->getOptions().forceFragColorDummyExport; FragColorExport::Key key = FragColorExport::computeKey(m_info, m_pipelineState); fragColorExport.generateExportInstructions(m_info, m_exportValues, dummyExport, m_pipelineState->getPalMetadata(), builder, dynamicIsDualSource, key); @@ -1041,6 +1041,7 @@ void FragColorExport::generateExportInstructions(ArrayRef info, } if (!lastExport && dummyExport) { lastExport = FragColorExport::addDummyExport(builder); + palMetadata->setPsDummyExport(); finalExportFormats.push_back(EXP_FORMAT_32_R); } if (lastExport) @@ -1106,10 +1107,8 @@ Function *FragColorExport::generateNullFragmentEntryPoint(Module &module, Pipeli entryPoint->setDLLStorageClass(GlobalValue::DLLExportStorageClass); setShaderStage(entryPoint, ShaderStage::Fragment); entryPoint->setCallingConv(CallingConv::AMDGPU_PS); - if (pipelineState->getTargetInfo().getGfxIpVersion().major >= 10) { - const unsigned waveSize = pipelineState->getShaderWaveSize(ShaderStage::Fragment); - entryPoint->addFnAttr("target-features", ",+wavefrontsize" + std::to_string(waveSize)); // Set wavefront size - } + const unsigned waveSize = pipelineState->getShaderWaveSize(ShaderStage::Fragment); + entryPoint->addFnAttr("target-features", ",+wavefrontsize" + std::to_string(waveSize)); // Set wavefront size return entryPoint; } diff --git a/lgc/patch/Gfx9Chip.cpp b/lgc/patch/Gfx9Chip.cpp deleted file mode 100644 index 0ac1173deb..0000000000 --- a/lgc/patch/Gfx9Chip.cpp +++ /dev/null @@ -1,372 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ -/** - *********************************************************************************************************************** - * @file Gfx9Chip.cpp - * @brief LLPC header file: contains implementations for Gfx9 chips. - *********************************************************************************************************************** - */ -#include "Gfx9Chip.h" -#include "llvm/Support/ErrorHandling.h" - -#define DEBUG_TYPE "lgc-gfx9-chip" - -namespace lgc { - -namespace Gfx9 { -#include "chip/gfx9/gfx9_plus_merged_enum.h" -#include "chip/gfx9/gfx9_plus_merged_offset.h" - -// NOTE: This register only exist in GFX9 and GFX10, but its values are still useful for programming other registers in -// PAL, so always leave it in the ELF. -const unsigned mmVGT_GS_ONCHIP_CNTL = Pal::Gfx9::Gfx09_10::mmVGT_GS_ONCHIP_CNTL; - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -VsRegConfig::VsRegConfig(GfxIpVersion gfxIp) { - INIT_REG(SPI_SHADER_PGM_RSRC1_VS); - INIT_REG(SPI_SHADER_PGM_RSRC2_VS); - INIT_REG(SPI_SHADER_POS_FORMAT); - INIT_REG(SPI_VS_OUT_CONFIG); - INIT_REG(PA_CL_VS_OUT_CNTL); - INIT_REG(PA_CL_CLIP_CNTL); - INIT_REG(PA_CL_VTE_CNTL); - INIT_REG(PA_SU_VTX_CNTL); - INIT_REG(VGT_PRIMITIVEID_EN); - INIT_REG(VGT_REUSE_OFF); - - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_STRMOUT_CONFIG); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_STRMOUT_BUFFER_CONFIG); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_STRMOUT_VTX_STRIDE_0); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_STRMOUT_VTX_STRIDE_1); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_STRMOUT_VTX_STRIDE_2); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_STRMOUT_VTX_STRIDE_3); - - INIT_REG_GFX10(gfxIp.major, SPI_SHADER_PGM_CHKSUM_VS); -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -LsHsRegConfig::LsHsRegConfig(GfxIpVersion gfxIp) { - INIT_REG(SPI_SHADER_PGM_RSRC1_HS); - INIT_REG(SPI_SHADER_PGM_RSRC2_HS); - INIT_REG(SPI_SHADER_PGM_RSRC4_HS); - INIT_REG(VGT_LS_HS_CONFIG); - INIT_REG(VGT_HOS_MIN_TESS_LEVEL); - INIT_REG(VGT_HOS_MAX_TESS_LEVEL); - INIT_REG(VGT_TF_PARAM); - INIT_REG_APU09_1X_PLUS(gfxIp.major, SPI_SHADER_PGM_CHKSUM_HS); -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -EsGsRegConfig::EsGsRegConfig(GfxIpVersion gfxIp) { - INIT_REG(SPI_SHADER_PGM_RSRC1_GS); - INIT_REG(SPI_SHADER_PGM_RSRC2_GS); - INIT_REG(SPI_SHADER_PGM_RSRC4_GS); - INIT_REG(VGT_GS_MAX_VERT_OUT); - INIT_REG(VGT_GS_INSTANCE_CNT); - INIT_REG(VGT_ESGS_RING_ITEMSIZE); - - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_VERT_ITEMSIZE); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_PER_VS); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GSVS_RING_ITEMSIZE); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_VERT_ITEMSIZE_1); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_VERT_ITEMSIZE_2); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_VERT_ITEMSIZE_3); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GSVS_RING_OFFSET_1); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GSVS_RING_OFFSET_2); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GSVS_RING_OFFSET_3); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_MODE); - - INIT_REG_GFX9_10(gfxIp.major, VGT_GS_ONCHIP_CNTL); - INIT_REG_GFX9_10(gfxIp.major, VGT_GS_OUT_PRIM_TYPE); - - INIT_REG_GFX9(gfxIp.major, VGT_GS_MAX_PRIMS_PER_SUBGROUP); - INIT_REG_GFX10_PLUS(gfxIp.major, GE_MAX_OUTPUT_PER_SUBGROUP); - INIT_REG_APU09_1X_PLUS(gfxIp.major, SPI_SHADER_PGM_CHKSUM_GS); - - INIT_REG_GFX10_PLUS(gfxIp.major, GE_NGG_SUBGRP_CNTL); - INIT_REG_GFX10_PLUS(gfxIp.major, SPI_SHADER_IDX_FORMAT); -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -PrimShaderRegConfig::PrimShaderRegConfig(GfxIpVersion gfxIp) { - INIT_REG(SPI_SHADER_PGM_RSRC1_GS); - INIT_REG(SPI_SHADER_PGM_RSRC2_GS); - INIT_REG(SPI_SHADER_PGM_RSRC4_GS); - INIT_REG(VGT_GS_MAX_VERT_OUT); - INIT_REG(VGT_GS_INSTANCE_CNT); - INIT_REG(VGT_ESGS_RING_ITEMSIZE); - INIT_REG(VGT_GS_ONCHIP_CNTL); - - // Special registers, having different register IDs - if (gfxIp.major == 10) { - INIT_REG_GFX9_10(gfxIp.major, VGT_GS_OUT_PRIM_TYPE); - } else if (gfxIp.major == 11) { - INIT_REG_GFX11(gfxIp.major, VGT_GS_OUT_PRIM_TYPE); - } else { - llvm_unreachable("Not implemented!"); - } - - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_VERT_ITEMSIZE); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_PER_VS); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GSVS_RING_ITEMSIZE); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_VERT_ITEMSIZE_1); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_VERT_ITEMSIZE_2); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_VERT_ITEMSIZE_3); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GSVS_RING_OFFSET_1); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GSVS_RING_OFFSET_2); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GSVS_RING_OFFSET_3); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_MODE); - - INIT_REG_GFX10_PLUS(gfxIp.major, GE_MAX_OUTPUT_PER_SUBGROUP); - INIT_REG_APU09_1X_PLUS(gfxIp.major, SPI_SHADER_PGM_CHKSUM_GS); - - INIT_REG(SPI_SHADER_POS_FORMAT); - INIT_REG(SPI_VS_OUT_CONFIG); - INIT_REG(PA_CL_VS_OUT_CNTL); - INIT_REG(PA_CL_CLIP_CNTL); - INIT_REG(PA_CL_VTE_CNTL); - INIT_REG(PA_SU_VTX_CNTL); - INIT_REG(VGT_PRIMITIVEID_EN); - INIT_REG(VGT_REUSE_OFF); - - INIT_REG_GFX10_PLUS(gfxIp.major, GE_NGG_SUBGRP_CNTL); - INIT_REG_GFX10_PLUS(gfxIp.major, SPI_SHADER_IDX_FORMAT); - - INIT_REG(SPI_SHADER_PGM_LO_GS); -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -PsRegConfig::PsRegConfig(GfxIpVersion gfxIp) { - INIT_REG(SPI_SHADER_PGM_RSRC1_PS); - INIT_REG(SPI_SHADER_PGM_RSRC2_PS); - INIT_REG(SPI_SHADER_Z_FORMAT); - INIT_REG(SPI_SHADER_COL_FORMAT); - INIT_REG(SPI_BARYC_CNTL); - INIT_REG(SPI_PS_IN_CONTROL); - INIT_REG(SPI_PS_INPUT_ENA); - INIT_REG(SPI_PS_INPUT_ADDR); - INIT_REG(SPI_INTERP_CONTROL_0); - INIT_REG(PA_SC_MODE_CNTL_1); - INIT_REG(DB_SHADER_CONTROL); - INIT_REG(CB_SHADER_MASK); - INIT_REG(PA_SC_AA_CONFIG); - INIT_REG(PA_SC_SHADER_CONTROL); - INIT_REG_GFX10_PLUS(gfxIp.major, PA_STEREO_CNTL); - INIT_REG_GFX10_PLUS(gfxIp.major, GE_STEREO_CNTL); - INIT_REG_APU09_1X_PLUS(gfxIp.major, SPI_SHADER_PGM_CHKSUM_PS); - - INIT_REG_GFX10_PLUS(gfxIp.major, SPI_SHADER_PGM_RSRC4_PS); - - INIT_REG_GFX10_PLUS(gfxIp.major, GE_USER_VGPR_EN); -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -PipelineVsFsRegConfig::PipelineVsFsRegConfig(GfxIpVersion gfxIp) : vsRegs(gfxIp), psRegs(gfxIp) { - INIT_REG(VGT_SHADER_STAGES_EN); - INIT_REG_GFX9(gfxIp.major, IA_MULTI_VGT_PARAM); - INIT_REG_GFX10(gfxIp.major, IA_MULTI_VGT_PARAM_PIPED); - INIT_REG_GFX9_10(gfxIp.major, VGT_GS_ONCHIP_CNTL); -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -PipelineVsTsFsRegConfig::PipelineVsTsFsRegConfig(GfxIpVersion gfxIp) : lsHsRegs(gfxIp), vsRegs(gfxIp), psRegs(gfxIp) { - INIT_REG(VGT_SHADER_STAGES_EN); - INIT_REG_GFX9(gfxIp.major, IA_MULTI_VGT_PARAM); - INIT_REG_GFX10(gfxIp.major, IA_MULTI_VGT_PARAM_PIPED); - INIT_REG_GFX9_10(gfxIp.major, VGT_GS_ONCHIP_CNTL); -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -PipelineVsGsFsRegConfig::PipelineVsGsFsRegConfig(GfxIpVersion gfxIp) : esGsRegs(gfxIp), vsRegs(gfxIp), psRegs(gfxIp) { - INIT_REG(VGT_SHADER_STAGES_EN); - INIT_REG_GFX9(gfxIp.major, IA_MULTI_VGT_PARAM); - INIT_REG_GFX10(gfxIp.major, IA_MULTI_VGT_PARAM_PIPED); -} - -// ===================================================================================================================== -// Initializer -PipelineVsTsGsFsRegConfig::PipelineVsTsGsFsRegConfig(GfxIpVersion gfxIp) - : lsHsRegs(gfxIp), esGsRegs(gfxIp), vsRegs(gfxIp), psRegs(gfxIp) { - INIT_REG(VGT_SHADER_STAGES_EN); - INIT_REG_GFX9(gfxIp.major, IA_MULTI_VGT_PARAM); - INIT_REG_GFX10(gfxIp.major, IA_MULTI_VGT_PARAM_PIPED); -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -PipelineNggVsFsRegConfig::PipelineNggVsFsRegConfig(GfxIpVersion gfxIp) : primShaderRegs(gfxIp), psRegs(gfxIp) { - INIT_REG(VGT_SHADER_STAGES_EN); - INIT_REG_GFX10(gfxIp.major, IA_MULTI_VGT_PARAM_PIPED); -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -PipelineNggVsTsFsRegConfig::PipelineNggVsTsFsRegConfig(GfxIpVersion gfxIp) - : lsHsRegs(gfxIp), primShaderRegs(gfxIp), psRegs(gfxIp) { - INIT_REG(VGT_SHADER_STAGES_EN); - INIT_REG_GFX10(gfxIp.major, IA_MULTI_VGT_PARAM_PIPED); -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -PipelineNggVsGsFsRegConfig::PipelineNggVsGsFsRegConfig(GfxIpVersion gfxIp) : primShaderRegs(gfxIp), psRegs(gfxIp) { - INIT_REG(VGT_SHADER_STAGES_EN); - INIT_REG_GFX10(gfxIp.major, IA_MULTI_VGT_PARAM_PIPED); -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -PipelineNggVsTsGsFsRegConfig::PipelineNggVsTsGsFsRegConfig(GfxIpVersion gfxIp) - : lsHsRegs(gfxIp), primShaderRegs(gfxIp), psRegs(gfxIp) { - INIT_REG(VGT_SHADER_STAGES_EN); - INIT_REG_GFX10(gfxIp.major, IA_MULTI_VGT_PARAM_PIPED); -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -CsRegConfig::CsRegConfig(GfxIpVersion gfxIp) { - INIT_REG(COMPUTE_PGM_RSRC1); - INIT_REG(COMPUTE_PGM_RSRC2); - INIT_REG(COMPUTE_NUM_THREAD_X); - INIT_REG(COMPUTE_NUM_THREAD_Y); - INIT_REG(COMPUTE_NUM_THREAD_Z); - INIT_REG_GFX10_PLUS(gfxIp.major, COMPUTE_SHADER_CHKSUM); - INIT_REG_GFX10_PLUS(gfxIp.major, COMPUTE_PGM_RSRC3); -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -MeshRegConfig::MeshRegConfig(GfxIpVersion gfxIp) { - assert(gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ - INIT_REG_APU09_1X_PLUS(gfxIp.major, SPI_SHADER_PGM_CHKSUM_GS); - - INIT_REG(VGT_SHADER_STAGES_EN); - INIT_REG_GFX10(gfxIp.major, IA_MULTI_VGT_PARAM_PIPED); - - INIT_REG(SPI_SHADER_PGM_RSRC1_GS); - INIT_REG(SPI_SHADER_PGM_RSRC2_GS); - INIT_REG(SPI_SHADER_PGM_RSRC4_GS); - INIT_REG(VGT_GS_MAX_VERT_OUT); - INIT_REG(VGT_GS_INSTANCE_CNT); - INIT_REG(VGT_ESGS_RING_ITEMSIZE); - INIT_REG(VGT_GS_ONCHIP_CNTL); - - // Special registers, having different register IDs - if (gfxIp.major == 10) { - INIT_REG_GFX9_10(gfxIp.major, VGT_GS_OUT_PRIM_TYPE); - } else if (gfxIp.major == 11) { - INIT_REG_GFX11(gfxIp.major, VGT_GS_OUT_PRIM_TYPE); - } else { - llvm_unreachable("Not implemented!"); - } - - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_VERT_ITEMSIZE); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_PER_VS); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GSVS_RING_ITEMSIZE); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_VERT_ITEMSIZE_1); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_VERT_ITEMSIZE_2); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_VERT_ITEMSIZE_3); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GSVS_RING_OFFSET_1); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GSVS_RING_OFFSET_2); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GSVS_RING_OFFSET_3); - INIT_REG_HAS_HW_VS(gfxIp.major, VGT_GS_MODE); - - INIT_REG_GFX10_PLUS(gfxIp.major, GE_MAX_OUTPUT_PER_SUBGROUP); - - INIT_REG(SPI_SHADER_POS_FORMAT); - INIT_REG(SPI_VS_OUT_CONFIG); - INIT_REG(PA_CL_VS_OUT_CNTL); - INIT_REG(PA_CL_CLIP_CNTL); - INIT_REG(PA_CL_VTE_CNTL); - INIT_REG(PA_SU_VTX_CNTL); - INIT_REG(VGT_PRIMITIVEID_EN); - INIT_REG(VGT_REUSE_OFF); - INIT_REG(VGT_DRAW_PAYLOAD_CNTL); - - INIT_REG_GFX10_PLUS(gfxIp.major, GE_NGG_SUBGRP_CNTL); - INIT_REG_GFX10_PLUS(gfxIp.major, SPI_SHADER_IDX_FORMAT); - - if (gfxIp.major <= 11) { - INIT_REG_GFX11(gfxIp.major, SPI_SHADER_GS_MESHLET_DIM); - INIT_REG_GFX11(gfxIp.major, SPI_SHADER_GS_MESHLET_EXP_ALLOC); - } else { - llvm_unreachable("Not implemented!"); - } -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -PipelineMeshFsRegConfig::PipelineMeshFsRegConfig(GfxIpVersion gfxIp) : meshRegs(gfxIp), psRegs(gfxIp) { -} - -// ===================================================================================================================== -// Initializer -// -// @param gfxIp : Graphics IP version info -PipelineTaskMeshFsRegConfig::PipelineTaskMeshFsRegConfig(GfxIpVersion gfxIp) - : taskRegs(gfxIp), meshRegs(gfxIp), psRegs(gfxIp) { -} - -} // namespace Gfx9 - -} // namespace lgc diff --git a/lgc/patch/Gfx9Chip.h b/lgc/patch/Gfx9Chip.h deleted file mode 100644 index 6bb0b37419..0000000000 --- a/lgc/patch/Gfx9Chip.h +++ /dev/null @@ -1,605 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ -/** - *********************************************************************************************************************** - * @file Gfx9Chip.h - * @brief LLPC header file: contains various definitions for Gfx9 chips. - *********************************************************************************************************************** - */ -#pragma once - -#include "ConfigBuilderBase.h" -#include "lgc/state/TargetInfo.h" -#include -#include - -namespace lgc { - -namespace Gfx9 { - -#undef CS_ENABLE - -#include "chip/gfx9/gfx9_plus_merged_offset.h" -#include "chip/gfx9/gfx9_plus_merged_registers.h" -#include "chip/gfx9/gfx9_plus_merged_typedef.h" - -using namespace Pal::Gfx9::Chip; - -// ===================================================================================================================== -// Helper macros to operate registers - -// Defines fields: register ID (byte-based) and its value -#define DEF_REG(_reg) \ - unsigned _reg##_ID; \ - reg##_reg _reg##_VAL; - -// Defines GFX-dependent fields: register ID (byte-based) and its value -#define DEF_REG_ID(_reg) unsigned _reg##_ID; -#define DEF_REG_VAL(_reg) \ - struct { \ - unsigned u32All; \ - } _reg##_VAL; - -// Initializes register ID and its value -#define INIT_REG(_reg) \ - { \ - _reg##_ID = mm##_reg; \ - _reg##_VAL.u32All = 0; \ - } - -// Initializes register to invalid ID and value -#define INIT_REG_TO_INVALID(_reg) \ - { \ - _reg##_ID = InvalidMetadataKey; \ - _reg##_VAL.u32All = InvalidMetadataValue; \ - } - -// Initializes GFX-dependent register ID and its value -// GFX10 plus -#define INIT_REG_GFX10_PLUS(_gfx, _reg) \ - { \ - if (_gfx >= 10) { \ - _reg##_ID = Gfx10Plus::mm##_reg; \ - _reg##_VAL.u32All = 0; \ - } else { \ - INIT_REG_TO_INVALID(_reg); \ - } \ - } - -// Apu09.1x plus -#define INIT_REG_APU09_1X_PLUS(_gfx, _reg) \ - { \ - if (_gfx >= 10) { \ - _reg##_ID = Apu09_1xPlus::mm##_reg; \ - _reg##_VAL.u32All = 0; \ - } else { \ - INIT_REG_TO_INVALID(_reg); \ - } \ - } - -// GFX9 only -#define INIT_REG_GFX9(_gfx, _reg) \ - { \ - if (_gfx == 9) { \ - _reg##_ID = Gfx09::mm##_reg; \ - _reg##_VAL.u32All = 0; \ - } else { \ - INIT_REG_TO_INVALID(_reg); \ - } \ - } - -// GFX10 only -#define INIT_REG_GFX10(_gfx, _reg) \ - { \ - if (_gfx == 10) { \ - _reg##_ID = Pal::Gfx9::Chip::Gfx10::mm##_reg; \ - _reg##_VAL.u32All = 0; \ - } else { \ - INIT_REG_TO_INVALID(_reg); \ - } \ - } - -// GFX11 only -#define INIT_REG_GFX11(_gfx, _reg) \ - { \ - if (_gfx == 11) { \ - _reg##_ID = Pal::Gfx9::Chip::Gfx11::mm##_reg; \ - _reg##_VAL.u32All = 0; \ - } else { \ - INIT_REG_TO_INVALID(_reg); \ - } \ - } - -// GFX9-GFX10 only -#define INIT_REG_GFX9_10(_gfx, _reg) \ - { \ - if (_gfx == 9 || _gfx == 10) { \ - _reg##_ID = Gfx09_10::mm##_reg; \ - _reg##_VAL.u32All = 0; \ - } else { \ - INIT_REG_TO_INVALID(_reg); \ - } \ - } - -// HasHwVs only -#define INIT_REG_HAS_HW_VS(_gfx, _reg) \ - { \ - if (_gfx == 9 || _gfx == 10) { \ - _reg##_ID = HasHwVs::mm##_reg; \ - _reg##_VAL.u32All = 0; \ - } else { \ - INIT_REG_TO_INVALID(_reg); \ - } \ - } - -// Case label for switch, set register value -#define CASE_SET_REG(_stage, _reg, _val) \ - case (mm##_reg * 4): { \ - (_stage)->_reg##_VAL.u32All = (_val); \ - break; \ - } - -// Gets register value -#define GET_REG(_stage, _reg) ((_stage)->_reg##_VAL.u32All) - -// Sets register value -#define SET_REG(_stage, _reg, _val) (_stage)->_reg##_VAL.u32All = (_val); - -// Invalidate register, set it to invalid ID and value -#define INVALIDATE_REG(_stage, _reg) \ - { \ - (_stage)->_reg##_ID = InvalidMetadataKey; \ - (_stage)->_reg##_VAL.u32All = InvalidMetadataValue; \ - } - -// Gets register field value -#define GET_REG_FIELD(_stage, _reg, _field) ((_stage)->_reg##_VAL.bits._field) - -// Sets register field value -#define SET_REG_FIELD(_stage, _reg, _field, _val) (_stage)->_reg##_VAL.bits._field = (_val); - -// Sets register most field value -#define SET_REG_MOST_FIELD(_stage, _reg, _field, _val) (_stage)->_reg##_VAL.most._field = (_val); - -// Gets register core field value -#define GET_REG_CORE_FIELD(_stage, _reg, _field) ((_stage)->_reg##_VAL.core._field) - -// Sets register core field value -#define SET_REG_CORE_FIELD(_stage, _reg, _field, _val) (_stage)->_reg##_VAL.core._field = (_val); - -// Gets GFX-dependent register field value -#define GET_REG_GFX9_FIELD(_stage, _reg, _field) ((_stage)->_reg##_VAL.gfx09._field) -#define GET_REG_GFX10_FIELD(_stage, _reg, _field) ((_stage)->_reg##_VAL.gfx10._field) - -// Sets GFX-dependent register field value -#define SET_REG_GFX9_FIELD(_stage, _reg, _field, _val) (_stage)->_reg##_VAL.gfx09._field = (_val); -#define SET_REG_GFX09_1X_PLUS_FIELD(_stage, _reg, _field, _val) (_stage)->_reg##_VAL.gfx09_1xPlus._field = (_val); -#define SET_REG_GFX10_FIELD(_stage, _reg, _field, _val) (_stage)->_reg##_VAL.gfx10._field = (_val); -#define SET_REG_GFX9_10_FIELD(_stage, _reg, _field, _val) (_stage)->_reg##_VAL.gfx09_10._field = (_val); -#define SET_REG_GFX10_PLUS_FIELD(_stage, _reg, _field, _val) (_stage)->_reg##_VAL.gfx10Plus._field = (_val); -#define SET_REG_GFX10_1_FIELD(_stage, _reg, _field, _val) (_stage)->_reg##_VAL.gfx101._field = (_val); -#define SET_REG_GFX10_3_PLUS_FIELD(_stage, _reg, _field, _val) (_stage)->_reg##_VAL.gfx103Plus._field = (_val); -#define SET_REG_GFX10_3_PLUS_EXCLUSIVE_FIELD(_stage, _reg, _field, _val) \ - (_stage)->_reg##_VAL.gfx103PlusExclusive._field = (_val); -#define SET_REG_GFX10_4_PLUS_FIELD(_stage, _reg, _field, _val) (_stage)->_reg##_VAL.gfx104Plus._field = (_val); -#define SET_REG_GFX11_FIELD(_stage, _reg, _field, _val) (_stage)->_reg##_VAL.gfx11._field = (_val); - -// Preferred number of GS primitives per ES thread. -constexpr unsigned GsPrimsPerEsThread = 256; - -// Preferred number of GS threads per VS thread. -constexpr unsigned GsThreadsPerVsThread = 2; - -// Preferred number of HS threads per subgroup. -constexpr unsigned MaxHsThreadsPerSubgroup = 256; - -// Preferred number of GS threads per subgroup. -constexpr unsigned MaxGsThreadsPerSubgroup = 256; - -// Max number of threads per subgroup in NGG mode. -constexpr unsigned NggMaxThreadsPerSubgroup = 256; - -// Max number of waves per subgroup in NGG mode. -constexpr unsigned NggMaxWavesPerSubgroup = NggMaxThreadsPerSubgroup / 32; - -// Max size of primitives per subgroup for adjacency primitives or when GS instancing is used. This restriction is -// applicable only when onchip GS is used. -constexpr unsigned OnChipGsMaxPrimPerSubgroup = 255; -constexpr unsigned OnChipGsMaxPrimPerSubgroupAdj = 127; -constexpr unsigned OnChipGsMaxEsVertsPerSubgroup = 255; - -// Default value for the maximum LDS size per GS subgroup, in dword's. -constexpr unsigned DefaultLdsSizePerSubgroup = 8192; - -constexpr unsigned EsVertsOffchipGsOrTess = 250; -constexpr unsigned GsPrimsOffchipGsOrTess = 126; - -// The register headers don't specify an enum for the values of VGT_GS_MODE.ONCHIP. -enum VGT_GS_MODE_ONCHIP_TYPE : unsigned { - VGT_GS_MODE_ONCHIP_OFF = 1, - VGT_GS_MODE_ONCHIP_ON = 3, -}; - -// The register headers don't specify an enum for the values of PA_STEREO_CNTL.STEREO_MODE. -enum StereoMode : unsigned { - SHADER_STEREO_X = 0, - STATE_STEREO_X = 1, - SHADER_STEREO_XYZW = 2, -}; - -namespace Gfx10 { -constexpr unsigned int mmSPI_SHADER_PGM_CHKSUM_GS = Apu09_1xPlus::mmSPI_SHADER_PGM_CHKSUM_GS; -constexpr unsigned int mmSPI_SHADER_PGM_CHKSUM_HS = Apu09_1xPlus::mmSPI_SHADER_PGM_CHKSUM_HS; -constexpr unsigned int mmSPI_SHADER_PGM_CHKSUM_PS = Apu09_1xPlus::mmSPI_SHADER_PGM_CHKSUM_PS; -}; // namespace Gfx10 - -// ===================================================================================================================== -// Represents configuration of static registers relevant to hardware vertex shader. -struct VsRegConfig { - DEF_REG(SPI_SHADER_PGM_RSRC1_VS); - DEF_REG(SPI_SHADER_PGM_RSRC2_VS); - DEF_REG(SPI_SHADER_POS_FORMAT); - DEF_REG(SPI_VS_OUT_CONFIG); - DEF_REG(PA_CL_VS_OUT_CNTL); - DEF_REG(PA_CL_CLIP_CNTL); - DEF_REG(PA_CL_VTE_CNTL); - DEF_REG(PA_SU_VTX_CNTL); - DEF_REG(VGT_PRIMITIVEID_EN); - DEF_REG(VGT_REUSE_OFF); - DEF_REG(VGT_STRMOUT_CONFIG); - DEF_REG(VGT_STRMOUT_BUFFER_CONFIG); - DEF_REG(VGT_STRMOUT_VTX_STRIDE_0); - DEF_REG(VGT_STRMOUT_VTX_STRIDE_1); - DEF_REG(VGT_STRMOUT_VTX_STRIDE_2); - DEF_REG(VGT_STRMOUT_VTX_STRIDE_3); - DEF_REG(SPI_SHADER_PGM_CHKSUM_VS); - - VsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of static registers relevant to hardware local-hull merged shader. -struct LsHsRegConfig { - DEF_REG(SPI_SHADER_PGM_RSRC1_HS); - DEF_REG(SPI_SHADER_PGM_RSRC2_HS); - DEF_REG(SPI_SHADER_PGM_RSRC4_HS); - DEF_REG(VGT_LS_HS_CONFIG); - DEF_REG(VGT_HOS_MIN_TESS_LEVEL); - DEF_REG(VGT_HOS_MAX_TESS_LEVEL); - DEF_REG(VGT_TF_PARAM); - DEF_REG(SPI_SHADER_PGM_CHKSUM_HS); - - LsHsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of static registers relevant to hardware export-geometry merged shader. -struct EsGsRegConfig { - DEF_REG(SPI_SHADER_PGM_RSRC1_GS); - DEF_REG(SPI_SHADER_PGM_RSRC2_GS); - DEF_REG(SPI_SHADER_PGM_RSRC4_GS); - DEF_REG(VGT_GS_MAX_VERT_OUT); - DEF_REG(VGT_GS_ONCHIP_CNTL); - DEF_REG(VGT_GS_VERT_ITEMSIZE); - DEF_REG(VGT_GS_INSTANCE_CNT); - DEF_REG(VGT_GS_PER_VS); - DEF_REG(VGT_GS_OUT_PRIM_TYPE); - DEF_REG(VGT_GSVS_RING_ITEMSIZE); - DEF_REG(VGT_GS_VERT_ITEMSIZE_1); - DEF_REG(VGT_GS_VERT_ITEMSIZE_2); - DEF_REG(VGT_GS_VERT_ITEMSIZE_3); - DEF_REG(VGT_GSVS_RING_OFFSET_1); - DEF_REG(VGT_GSVS_RING_OFFSET_2); - DEF_REG(VGT_GSVS_RING_OFFSET_3); - DEF_REG(VGT_GS_MODE); - DEF_REG(VGT_ESGS_RING_ITEMSIZE); - DEF_REG(VGT_GS_MAX_PRIMS_PER_SUBGROUP); - DEF_REG(GE_MAX_OUTPUT_PER_SUBGROUP); - DEF_REG(SPI_SHADER_PGM_CHKSUM_GS); - - DEF_REG(GE_NGG_SUBGRP_CNTL); - DEF_REG(SPI_SHADER_IDX_FORMAT); - - EsGsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of static registers relevant to hardware primitive shader (NGG). -struct PrimShaderRegConfig { - DEF_REG(SPI_SHADER_PGM_RSRC1_GS); - DEF_REG(SPI_SHADER_PGM_RSRC2_GS); - DEF_REG(SPI_SHADER_PGM_RSRC4_GS); - DEF_REG(VGT_GS_MAX_VERT_OUT); - DEF_REG(VGT_GS_ONCHIP_CNTL); - DEF_REG(VGT_GS_VERT_ITEMSIZE); - DEF_REG(VGT_GS_INSTANCE_CNT); - DEF_REG(VGT_GS_PER_VS); - DEF_REG(VGT_GS_OUT_PRIM_TYPE); - DEF_REG(VGT_GSVS_RING_ITEMSIZE); - DEF_REG(VGT_GS_VERT_ITEMSIZE_1); - DEF_REG(VGT_GS_VERT_ITEMSIZE_2); - DEF_REG(VGT_GS_VERT_ITEMSIZE_3); - DEF_REG(VGT_GSVS_RING_OFFSET_1); - DEF_REG(VGT_GSVS_RING_OFFSET_2); - DEF_REG(VGT_GSVS_RING_OFFSET_3); - DEF_REG(VGT_GS_MODE); - DEF_REG(VGT_ESGS_RING_ITEMSIZE); - DEF_REG(GE_MAX_OUTPUT_PER_SUBGROUP); - DEF_REG(SPI_SHADER_PGM_CHKSUM_GS); - - DEF_REG(SPI_SHADER_POS_FORMAT); - DEF_REG(SPI_VS_OUT_CONFIG); - DEF_REG(PA_CL_VS_OUT_CNTL); - DEF_REG(PA_CL_CLIP_CNTL); - DEF_REG(PA_CL_VTE_CNTL); - DEF_REG(PA_SU_VTX_CNTL); - DEF_REG(VGT_PRIMITIVEID_EN); - DEF_REG(VGT_REUSE_OFF); - - DEF_REG(GE_NGG_SUBGRP_CNTL); - DEF_REG(SPI_SHADER_IDX_FORMAT); - - DEF_REG(SPI_SHADER_PGM_LO_GS); - - PrimShaderRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of static registers relevant to hardware pixel shader. -struct PsRegConfig { - DEF_REG(SPI_SHADER_PGM_RSRC1_PS); - DEF_REG(SPI_SHADER_PGM_RSRC2_PS); - DEF_REG(SPI_SHADER_PGM_RSRC4_PS); - DEF_REG(SPI_SHADER_Z_FORMAT); - DEF_REG(SPI_SHADER_COL_FORMAT); - DEF_REG(SPI_BARYC_CNTL); - DEF_REG(SPI_PS_IN_CONTROL); - DEF_REG(SPI_PS_INPUT_ENA); - DEF_REG(SPI_PS_INPUT_ADDR); - DEF_REG(SPI_INTERP_CONTROL_0); - DEF_REG(PA_SC_MODE_CNTL_1); - DEF_REG(DB_SHADER_CONTROL); - DEF_REG(CB_SHADER_MASK); - DEF_REG(PA_SC_AA_CONFIG); - DEF_REG(PA_SC_SHADER_CONTROL); - DEF_REG(PA_STEREO_CNTL); - DEF_REG(GE_STEREO_CNTL); - DEF_REG(GE_USER_VGPR_EN); - DEF_REG(SPI_SHADER_PGM_CHKSUM_PS); - - PsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of registers relevant to graphics pipeline (VS-FS). -struct PipelineVsFsRegConfig { - static constexpr bool ContainsPalAbiMetadataOnly = true; - - VsRegConfig vsRegs; // VS -> hardware VS - PsRegConfig psRegs; // FS -> hardware PS - DEF_REG(VGT_SHADER_STAGES_EN); - DEF_REG(VGT_GS_ONCHIP_CNTL); - DEF_REG(IA_MULTI_VGT_PARAM); - DEF_REG(IA_MULTI_VGT_PARAM_PIPED); - - PipelineVsFsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of registers relevant to graphics pipeline (VS-TS-FS). -struct PipelineVsTsFsRegConfig { - static constexpr bool ContainsPalAbiMetadataOnly = true; - - LsHsRegConfig lsHsRegs; // VS-TCS -> hardware LS-HS - VsRegConfig vsRegs; // TES -> hardware VS - PsRegConfig psRegs; // FS -> hardware PS - - DEF_REG(VGT_SHADER_STAGES_EN); - DEF_REG(IA_MULTI_VGT_PARAM); - DEF_REG(IA_MULTI_VGT_PARAM_PIPED); - DEF_REG(VGT_GS_ONCHIP_CNTL); - - PipelineVsTsFsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of registers relevant to graphics pipeline (VS-GS-FS). -struct PipelineVsGsFsRegConfig { - static constexpr bool ContainsPalAbiMetadataOnly = true; - - EsGsRegConfig esGsRegs; // VS-GS -> hardware ES-GS - VsRegConfig vsRegs; // Copy shader -> hardware VS - PsRegConfig psRegs; // FS -> hardware PS - - DEF_REG(VGT_SHADER_STAGES_EN); - DEF_REG(IA_MULTI_VGT_PARAM); - DEF_REG(IA_MULTI_VGT_PARAM_PIPED); - - PipelineVsGsFsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of registers relevant to graphics pipeline (VS-TS-GS-FS). -struct PipelineVsTsGsFsRegConfig { - static constexpr bool ContainsPalAbiMetadataOnly = true; - - LsHsRegConfig lsHsRegs; // VS-TCS -> hardware LS-HS - EsGsRegConfig esGsRegs; // TES-GS -> hardware ES-GS - VsRegConfig vsRegs; // Copy shader -> hardware VS - PsRegConfig psRegs; // FS -> hardware PS - - DEF_REG(VGT_SHADER_STAGES_EN); - DEF_REG(IA_MULTI_VGT_PARAM); - DEF_REG(IA_MULTI_VGT_PARAM_PIPED); - - PipelineVsTsGsFsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of registers relevant to graphics pipeline (NGG, VS-FS). -struct PipelineNggVsFsRegConfig { - static constexpr bool ContainsPalAbiMetadataOnly = true; - - PrimShaderRegConfig primShaderRegs; // VS -> hardware primitive shader (NGG, ES-GS) - PsRegConfig psRegs; // FS -> hardware PS - DEF_REG(VGT_SHADER_STAGES_EN); - DEF_REG(IA_MULTI_VGT_PARAM_PIPED); - - PipelineNggVsFsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of registers relevant to graphics pipeline (NGG, VS-TS-FS). -struct PipelineNggVsTsFsRegConfig { - static constexpr bool ContainsPalAbiMetadataOnly = true; - - LsHsRegConfig lsHsRegs; // VS-TCS -> hardware LS-HS - PrimShaderRegConfig primShaderRegs; // TES -> hardware primitive shader (NGG, ES-GS) - PsRegConfig psRegs; // FS -> hardware PS - - DEF_REG(VGT_SHADER_STAGES_EN); - DEF_REG(IA_MULTI_VGT_PARAM_PIPED); - - PipelineNggVsTsFsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of registers relevant to graphics pipeline (NGG, VS-GS-FS). -struct PipelineNggVsGsFsRegConfig { - static constexpr bool ContainsPalAbiMetadataOnly = true; - - PrimShaderRegConfig primShaderRegs; // VS-GS -> hardware primitive shader (NGG, ES-GS) - PsRegConfig psRegs; // FS -> hardware PS - - DEF_REG(VGT_SHADER_STAGES_EN); - DEF_REG(IA_MULTI_VGT_PARAM_PIPED); - - PipelineNggVsGsFsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of registers relevant to graphics pipeline (NGG, VS-TS-GS-FS). -struct PipelineNggVsTsGsFsRegConfig { - static constexpr bool ContainsPalAbiMetadataOnly = true; - - LsHsRegConfig lsHsRegs; // VS-TCS -> hardware LS-HS - PrimShaderRegConfig primShaderRegs; // TES-GS -> hardware primitive shader (NGG, ES-GS) - PsRegConfig psRegs; // FS -> hardware PS - - DEF_REG(VGT_SHADER_STAGES_EN); - DEF_REG(IA_MULTI_VGT_PARAM_PIPED); - - PipelineNggVsTsGsFsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of registers relevant to compute shader. -struct CsRegConfig { - static constexpr bool ContainsPalAbiMetadataOnly = true; - - DEF_REG(COMPUTE_PGM_RSRC1); - DEF_REG(COMPUTE_PGM_RSRC2); - DEF_REG(COMPUTE_NUM_THREAD_X); - DEF_REG(COMPUTE_NUM_THREAD_Y); - DEF_REG(COMPUTE_NUM_THREAD_Z); - DEF_REG(COMPUTE_PGM_RSRC3); - DEF_REG(COMPUTE_SHADER_CHKSUM); - - CsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of registers relevant to mesh shader. -struct MeshRegConfig { - DEF_REG(SPI_SHADER_PGM_CHKSUM_GS); - - DEF_REG(VGT_SHADER_STAGES_EN); - DEF_REG(IA_MULTI_VGT_PARAM_PIPED); - - DEF_REG(SPI_SHADER_PGM_RSRC1_GS); - DEF_REG(SPI_SHADER_PGM_RSRC2_GS); - DEF_REG(SPI_SHADER_PGM_RSRC4_GS); - DEF_REG(VGT_GS_MAX_VERT_OUT); - DEF_REG(VGT_GS_ONCHIP_CNTL); - DEF_REG(VGT_GS_VERT_ITEMSIZE); - DEF_REG(VGT_GS_INSTANCE_CNT); - DEF_REG(VGT_GS_PER_VS); - DEF_REG(VGT_GS_OUT_PRIM_TYPE); - DEF_REG(VGT_GSVS_RING_ITEMSIZE); - DEF_REG(VGT_GS_VERT_ITEMSIZE_1); - DEF_REG(VGT_GS_VERT_ITEMSIZE_2); - DEF_REG(VGT_GS_VERT_ITEMSIZE_3); - DEF_REG(VGT_GSVS_RING_OFFSET_1); - DEF_REG(VGT_GSVS_RING_OFFSET_2); - DEF_REG(VGT_GSVS_RING_OFFSET_3); - DEF_REG(VGT_GS_MODE); - DEF_REG(VGT_ESGS_RING_ITEMSIZE); - DEF_REG(GE_MAX_OUTPUT_PER_SUBGROUP); - - DEF_REG(SPI_SHADER_POS_FORMAT); - DEF_REG(SPI_VS_OUT_CONFIG); - DEF_REG(PA_CL_VS_OUT_CNTL); - DEF_REG(PA_CL_CLIP_CNTL); - DEF_REG(PA_CL_VTE_CNTL); - DEF_REG(PA_SU_VTX_CNTL); - DEF_REG(VGT_PRIMITIVEID_EN); - DEF_REG(VGT_REUSE_OFF); - DEF_REG(VGT_DRAW_PAYLOAD_CNTL); - - DEF_REG(GE_NGG_SUBGRP_CNTL); - DEF_REG(SPI_SHADER_IDX_FORMAT); - - DEF_REG(SPI_SHADER_GS_MESHLET_DIM); - DEF_REG(SPI_SHADER_GS_MESHLET_EXP_ALLOC); - - MeshRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of registers relevant to graphics pipeline (Mesh-FS). -struct PipelineMeshFsRegConfig { - static constexpr bool ContainsPalAbiMetadataOnly = true; - - MeshRegConfig meshRegs; // Mesh -> hardware primitive shader (NGG, ES-GS) - PsRegConfig psRegs; // FS -> hardware PS - - PipelineMeshFsRegConfig(GfxIpVersion gfxIp); -}; - -// ===================================================================================================================== -// Represents configuration of registers relevant to graphics pipeline (Task-Mesh-FS). -struct PipelineTaskMeshFsRegConfig { - static constexpr bool ContainsPalAbiMetadataOnly = true; - - CsRegConfig taskRegs; // Task -> hardware CS - MeshRegConfig meshRegs; // Mesh -> hardware primitive shader (NGG, ES-GS) - PsRegConfig psRegs; // FS -> hardware PS - - PipelineTaskMeshFsRegConfig(GfxIpVersion gfxIp); -}; - -} // namespace Gfx9 - -} // namespace lgc diff --git a/lgc/patch/Gfx9ConfigBuilder.cpp b/lgc/patch/Gfx9ConfigBuilder.cpp deleted file mode 100644 index 30d48f08ba..0000000000 --- a/lgc/patch/Gfx9ConfigBuilder.cpp +++ /dev/null @@ -1,2290 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ -/** - *********************************************************************************************************************** - * @file Gfx9ConfigBuilder.cpp - * @brief LLPC header file: contains implementation of class lgc::Gfx9::ConfigBuilder. - *********************************************************************************************************************** - */ -#include "Gfx9ConfigBuilder.h" -#include "lgc/BuiltIns.h" -#include "lgc/state/PipelineState.h" -#include "lgc/state/TargetInfo.h" - -#define DEBUG_TYPE "lgc-gfx9-config-builder" - -using namespace llvm; - -namespace lgc { - -namespace Gfx9 { - -#include "chip/gfx9/gfx9_plus_merged_enum.h" -#include "chip/gfx9/gfx9_plus_merged_offset.h" - -using namespace Pal::Gfx9::Chip; - -// ===================================================================================================================== -// Builds PAL metadata for pipeline. -void ConfigBuilder::buildPalMetadata() { - if (!m_pipelineState->isGraphics()) { - buildPipelineCsRegConfig(); - } else { - const bool hasTs = (m_hasTcs || m_hasTes); - const bool enableNgg = m_pipelineState->getNggControl()->enableNgg; - - if (m_hasTask) { - // Task-Mesh-FS pipeline - buildPipelineTaskMeshFsConfig(); - } else if (m_hasMesh) { - // Mesh-FS pipeline - buildPipelineMeshFsConfig(); - } else if (!hasTs && !m_hasGs) { - // VS-FS pipeline or FS-only shader (part-pipeline compilation) - if (m_gfxIp.major >= 10 && enableNgg) - buildPipelineNggVsFsRegConfig(); - else - buildPipelineVsFsRegConfig(); - } else if (hasTs && !m_hasGs) { - // VS-TS-FS pipeline - if (m_gfxIp.major >= 10 && enableNgg) - buildPipelineNggVsTsFsRegConfig(); - else - buildPipelineVsTsFsRegConfig(); - } else if (!hasTs && m_hasGs) { - // VS-GS-FS pipeline - if (m_gfxIp.major >= 10 && enableNgg) - buildPipelineNggVsGsFsRegConfig(); - else - buildPipelineVsGsFsRegConfig(); - } else { - // VS-TS-GS-FS pipeline - if (m_gfxIp.major >= 10 && enableNgg) - buildPipelineNggVsTsGsFsRegConfig(); - else - buildPipelineVsTsGsFsRegConfig(); - } - } - - writePalMetadata(); -} - -// ===================================================================================================================== -// Builds register configuration for graphics pipeline (VS-FS) or FS-only shader compilation -void ConfigBuilder::buildPipelineVsFsRegConfig() { - assert(m_gfxIp.major <= 10); // Must be GFX10 or below - - PipelineVsFsRegConfig config(m_gfxIp); - - addApiHwShaderMapping(ShaderStage::Fragment, Util::Abi::HwShaderPs); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, MAX_PRIMGRP_IN_WAVE, 2); - - if (m_pipelineState->hasShaderStage(ShaderStage::Vertex)) { - setPipelineType(Util::Abi::PipelineType::VsPs); - addApiHwShaderMapping(ShaderStage::Vertex, Util::Abi::HwShaderVs); - buildVsRegConfig(ShaderStage::Vertex, &config); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, VS_EN, VS_STAGE_REAL); - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Vertex); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, VS_W32_EN, (waveSize == 32)); - - if (m_gfxIp.major == 10) - setWaveFrontSize(Util::Abi::HardwareStage::Vs, waveSize); - - unsigned checksum = setShaderHash(ShaderStage::Vertex); - SET_REG(&config, VGT_GS_ONCHIP_CNTL, 0); - - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) { - SET_REG_MOST_FIELD(&config.vsRegs, SPI_SHADER_PGM_CHKSUM_VS, CHECKSUM, checksum); - } - - regIA_MULTI_VGT_PARAM iaMultiVgtParam = {}; - - // When non-patch primitives are used without tessellation enabled, PRIMGROUP_SIZE must be at least 4, and must be - // even if there are more than 2 shader engines on the GPU. - unsigned primGroupSize = 128; - unsigned numShaderEngines = m_pipelineState->getTargetInfo().getGpuProperty().numShaderEngines; - if (numShaderEngines > 2) - primGroupSize = alignTo(primGroupSize, 2); - - iaMultiVgtParam.bits.PRIMGROUP_SIZE = primGroupSize - 1; - - if (m_gfxIp.major == 10) { - SET_REG(&config, IA_MULTI_VGT_PARAM_PIPED, iaMultiVgtParam.u32All); - } else { - SET_REG(&config, IA_MULTI_VGT_PARAM, iaMultiVgtParam.u32All); - } - } else { - invalidRegConfig(config.vsRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::Fragment)) { - buildPsRegConfig(ShaderStage::Fragment, &config); - } else { - invalidRegConfig(config.psRegs); - } - - appendConfig(config); -} - -// ===================================================================================================================== -// Builds register configuration for graphics pipeline (VS-TS-FS). -void ConfigBuilder::buildPipelineVsTsFsRegConfig() { - assert(m_gfxIp.major <= 10); // Must be GFX10 or below - - PipelineVsTsFsRegConfig config(m_gfxIp); - - addApiHwShaderMapping(ShaderStage::Vertex, Util::Abi::HwShaderHs); - addApiHwShaderMapping(ShaderStage::TessControl, Util::Abi::HwShaderHs); - addApiHwShaderMapping(ShaderStage::TessEval, Util::Abi::HwShaderVs); - addApiHwShaderMapping(ShaderStage::Fragment, Util::Abi::HwShaderPs); - - setPipelineType(Util::Abi::PipelineType::Tess); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, MAX_PRIMGRP_IN_WAVE, 2); - // In GEN_TWO the only supported mode is fully distributed tessellation. The programming model is expected - // to set VGT_SHADER_STAGES_EN.DYNAMIC_HS=1 and VGT_TF_PARAM.NUM_DS_WAVES_PER_SIMD=0 - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, DYNAMIC_HS, true); - - if (m_pipelineState->hasShaderStage(ShaderStage::Vertex) || - m_pipelineState->hasShaderStage(ShaderStage::TessControl)) { - const bool hasVs = m_pipelineState->hasShaderStage(ShaderStage::Vertex); - const bool hasTcs = m_pipelineState->hasShaderStage(ShaderStage::TessControl); - - buildLsHsRegConfig(hasVs ? ShaderStage::Vertex : ShaderStage::Invalid, - hasTcs ? ShaderStage::TessControl : ShaderStage::Invalid, &config); - - unsigned checksum = setShaderHash(ShaderStage::Vertex); - checksum = checksum ^ setShaderHash(ShaderStage::TessControl); - - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) { - SET_REG_FIELD(&config.lsHsRegs, SPI_SHADER_PGM_CHKSUM_HS, CHECKSUM, checksum); - } - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, HS_EN, HS_STAGE_ON); - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, LS_EN, LS_STAGE_ON); - - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::TessControl); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, HS_W32_EN, (waveSize == 32)); - - if (m_gfxIp.major == 10) - setWaveFrontSize(Util::Abi::HardwareStage::Hs, waveSize); - } else { - invalidRegConfig(config.lsHsRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::TessEval)) { - buildVsRegConfig(ShaderStage::TessEval, &config); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, VS_EN, VS_STAGE_DS); - - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::TessEval); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, VS_W32_EN, (waveSize == 32)); - - if (m_gfxIp.major == 10) - setWaveFrontSize(Util::Abi::HardwareStage::Vs, waveSize); - - unsigned checksum = setShaderHash(ShaderStage::TessEval); - - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) { - SET_REG(&config.vsRegs, SPI_SHADER_PGM_CHKSUM_VS, checksum); - } - } else { - invalidRegConfig(config.vsRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::Fragment)) { - buildPsRegConfig(ShaderStage::Fragment, &config); - } else { - invalidRegConfig(config.psRegs); - } - - // Set up IA_MULTI_VGT_PARAM - regIA_MULTI_VGT_PARAM iaMultiVgtParam = {}; - - const auto &tcsBuiltInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessControl)->builtInUsage.tcs; - const auto &tesBuiltInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessEval)->builtInUsage.tes; - - if (tcsBuiltInUsage.primitiveId || tesBuiltInUsage.primitiveId) { - iaMultiVgtParam.bits.PARTIAL_ES_WAVE_ON = true; - iaMultiVgtParam.bits.SWITCH_ON_EOI = true; - } - - if (m_gfxIp.major == 10) { - SET_REG(&config, IA_MULTI_VGT_PARAM_PIPED, iaMultiVgtParam.u32All); - - SET_REG_FIELD(&config, VGT_GS_ONCHIP_CNTL, ES_VERTS_PER_SUBGRP, EsVertsOffchipGsOrTess); - SET_REG_FIELD(&config, VGT_GS_ONCHIP_CNTL, GS_PRIMS_PER_SUBGRP, GsPrimsOffchipGsOrTess); - SET_REG_FIELD(&config, VGT_GS_ONCHIP_CNTL, GS_INST_PRIMS_IN_SUBGRP, GsPrimsOffchipGsOrTess); - } else { - SET_REG(&config, IA_MULTI_VGT_PARAM, iaMultiVgtParam.u32All); - } - - appendConfig(config); -} - -// ===================================================================================================================== -// Builds register configuration for graphics pipeline (VS-GS-FS). -void ConfigBuilder::buildPipelineVsGsFsRegConfig() { - assert(m_gfxIp.major <= 10); // Must be GFX10 or below - - PipelineVsGsFsRegConfig config(m_gfxIp); - - addApiHwShaderMapping(ShaderStage::Vertex, Util::Abi::HwShaderGs); - addApiHwShaderMapping(ShaderStage::Geometry, Util::Abi::HwShaderGs | Util::Abi::HwShaderVs); - addApiHwShaderMapping(ShaderStage::Fragment, Util::Abi::HwShaderPs); - - setPipelineType(Util::Abi::PipelineType::Gs); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, MAX_PRIMGRP_IN_WAVE, 2); - - if (m_pipelineState->hasShaderStage(ShaderStage::Vertex) || m_pipelineState->hasShaderStage(ShaderStage::Geometry)) { - const bool hasVs = m_pipelineState->hasShaderStage(ShaderStage::Vertex); - const bool hasGs = m_pipelineState->hasShaderStage(ShaderStage::Geometry); - - buildEsGsRegConfig(hasVs ? ShaderStage::Vertex : ShaderStage::Invalid, - hasGs ? ShaderStage::Geometry : ShaderStage::Invalid, &config); - - unsigned checksum = setShaderHash(ShaderStage::Vertex); - checksum = checksum ^ setShaderHash(ShaderStage::Geometry); - - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) { - SET_REG_FIELD(&config.esGsRegs, SPI_SHADER_PGM_CHKSUM_GS, CHECKSUM, checksum); - } - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, ES_EN, ES_STAGE_REAL); - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, GS_EN, GS_STAGE_ON); - - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Geometry); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, GS_W32_EN, (waveSize == 32)); - - if (m_gfxIp.major == 10) - setWaveFrontSize(Util::Abi::HardwareStage::Gs, waveSize); - } else { - invalidRegConfig(config.esGsRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::Fragment)) { - buildPsRegConfig(ShaderStage::Fragment, &config); - } else { - invalidRegConfig(config.psRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::CopyShader)) { - buildVsRegConfig(ShaderStage::CopyShader, &config); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, VS_EN, VS_STAGE_COPY_SHADER); - - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::CopyShader); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, VS_W32_EN, (waveSize == 32)); - - if (m_gfxIp.major == 10) - setWaveFrontSize(Util::Abi::HardwareStage::Vs, waveSize); - } else { - invalidRegConfig(config.vsRegs); - } - - // Set up IA_MULTI_VGT_PARAM - regIA_MULTI_VGT_PARAM iaMultiVgtParam = {}; - - const unsigned primGroupSize = 128; - iaMultiVgtParam.bits.PRIMGROUP_SIZE = primGroupSize - 1; - - if (m_gfxIp.major == 10) { - SET_REG(&config, IA_MULTI_VGT_PARAM_PIPED, iaMultiVgtParam.u32All); - } else { - SET_REG(&config, IA_MULTI_VGT_PARAM, iaMultiVgtParam.u32All); - } - - appendConfig(config); -} - -// ===================================================================================================================== -// Builds register configuration for graphics pipeline (VS-TS-GS-FS). -void ConfigBuilder::buildPipelineVsTsGsFsRegConfig() { - assert(m_gfxIp.major <= 10); // Must be GFX10 or below - - PipelineVsTsGsFsRegConfig config(m_gfxIp); - - addApiHwShaderMapping(ShaderStage::Vertex, Util::Abi::HwShaderHs); - addApiHwShaderMapping(ShaderStage::TessControl, Util::Abi::HwShaderHs); - addApiHwShaderMapping(ShaderStage::TessEval, Util::Abi::HwShaderGs); - addApiHwShaderMapping(ShaderStage::Geometry, Util::Abi::HwShaderGs | Util::Abi::HwShaderVs); - addApiHwShaderMapping(ShaderStage::Fragment, Util::Abi::HwShaderPs); - - setPipelineType(Util::Abi::PipelineType::GsTess); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, MAX_PRIMGRP_IN_WAVE, 2); - - if (m_pipelineState->hasShaderStage(ShaderStage::Vertex) || - m_pipelineState->hasShaderStage(ShaderStage::TessControl)) { - const bool hasVs = m_pipelineState->hasShaderStage(ShaderStage::Vertex); - const bool hasTcs = m_pipelineState->hasShaderStage(ShaderStage::TessControl); - - buildLsHsRegConfig(hasVs ? ShaderStage::Vertex : ShaderStage::Invalid, - hasTcs ? ShaderStage::TessControl : ShaderStage::Invalid, &config); - - unsigned checksum = setShaderHash(ShaderStage::Vertex); - checksum = checksum ^ setShaderHash(ShaderStage::TessControl); - - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) { - SET_REG_FIELD(&config.lsHsRegs, SPI_SHADER_PGM_CHKSUM_HS, CHECKSUM, checksum); - } - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, HS_EN, HS_STAGE_ON); - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, LS_EN, LS_STAGE_ON); - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::TessControl); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, HS_W32_EN, (waveSize == 32)); - - if (m_gfxIp.major == 10) - setWaveFrontSize(Util::Abi::HardwareStage::Hs, waveSize); - - // In GEN_TWO the only supported mode is fully distributed tessellation. The programming model is expected - // to set VGT_SHADER_STAGES_EN.DYNAMIC_HS=1 and VGT_TF_PARAM.NUM_DS_WAVES_PER_SIMD=0 - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, DYNAMIC_HS, true); - } else { - invalidRegConfig(config.lsHsRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::TessEval) || - m_pipelineState->hasShaderStage(ShaderStage::Geometry)) { - const bool hasTes = m_pipelineState->hasShaderStage(ShaderStage::TessEval); - const bool hasGs = m_pipelineState->hasShaderStage(ShaderStage::Geometry); - - buildEsGsRegConfig(hasTes ? ShaderStage::TessEval : ShaderStage::Invalid, - hasGs ? ShaderStage::Geometry : ShaderStage::Invalid, &config); - - unsigned checksum = setShaderHash(ShaderStage::TessEval); - checksum = checksum ^ setShaderHash(ShaderStage::Geometry); - - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) { - SET_REG_FIELD(&config.esGsRegs, SPI_SHADER_PGM_CHKSUM_GS, CHECKSUM, checksum); - } - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, ES_EN, ES_STAGE_DS); - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, GS_EN, GS_STAGE_ON); - - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Geometry); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, GS_W32_EN, (waveSize == 32)); - - if (m_gfxIp.major == 10) - setWaveFrontSize(Util::Abi::HardwareStage::Gs, waveSize); - } else { - invalidRegConfig(config.esGsRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::Fragment)) { - buildPsRegConfig(ShaderStage::Fragment, &config); - } else { - invalidRegConfig(config.psRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::CopyShader)) { - buildVsRegConfig(ShaderStage::CopyShader, &config); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, VS_EN, VS_STAGE_COPY_SHADER); - - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::CopyShader); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, VS_W32_EN, (waveSize == 32)); - - if (m_gfxIp.major == 10) - setWaveFrontSize(Util::Abi::HardwareStage::Vs, waveSize); - } else { - invalidRegConfig(config.vsRegs); - } - - // Set up IA_MULTI_VGT_PARAM - regIA_MULTI_VGT_PARAM iaMultiVgtParam = {}; - - const auto &tcsBuiltInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessControl)->builtInUsage.tcs; - const auto &tesBuiltInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessEval)->builtInUsage.tes; - const auto &gsBuiltInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->builtInUsage.gs; - - // With tessellation, SWITCH_ON_EOI and PARTIAL_ES_WAVE_ON must be set if primitive ID is used by either the TCS, TES, - // or GS. - if (tcsBuiltInUsage.primitiveId || tesBuiltInUsage.primitiveId || gsBuiltInUsage.primitiveIdIn) - iaMultiVgtParam.bits.SWITCH_ON_EOI = true; - - if (m_gfxIp.major == 10) { - SET_REG(&config, IA_MULTI_VGT_PARAM_PIPED, iaMultiVgtParam.u32All); - } else { - SET_REG(&config, IA_MULTI_VGT_PARAM, iaMultiVgtParam.u32All); - } - - // Set up VGT_TF_PARAM - setupVgtTfParam(&config.lsHsRegs); - - appendConfig(config); -} - -// ===================================================================================================================== -// Builds register configuration for graphics pipeline (NGG, VS-FS) or FS-only shader compilation -void ConfigBuilder::buildPipelineNggVsFsRegConfig() { - assert(m_gfxIp.major >= 10); // Must be GFX10 or above - - PipelineNggVsFsRegConfig config(m_gfxIp); - - addApiHwShaderMapping(ShaderStage::Fragment, Util::Abi::HwShaderPs); - - if (m_pipelineState->hasShaderStage(ShaderStage::Vertex)) { - const auto nggControl = m_pipelineState->getNggControl(); - assert(nggControl->enableNgg); - - addApiHwShaderMapping(ShaderStage::Vertex, Util::Abi::HwShaderGs); - setPipelineType(Util::Abi::PipelineType::Ngg); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, MAX_PRIMGRP_IN_WAVE, 2); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, PRIMGEN_EN, true); - if (m_gfxIp.major <= 11) { - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, PRIMGEN_PASSTHRU_EN, nggControl->passthroughMode); - } - if (m_gfxIp.major >= 11) { - SET_REG_GFX10_4_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, PRIMGEN_PASSTHRU_NO_MSG, - nggControl->passthroughMode && !m_pipelineState->enableSwXfb()); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, NGG_WAVE_ID_EN, m_pipelineState->enableSwXfb()); - } - - buildPrimShaderRegConfig(ShaderStage::Vertex, ShaderStage::Invalid, &config); - - if (m_gfxIp.major <= 11) { - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, ES_EN, ES_STAGE_REAL); - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, VS_EN, VS_STAGE_REAL); - } - - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Vertex); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, GS_W32_EN, (waveSize == 32)); - - setWaveFrontSize(Util::Abi::HardwareStage::Gs, waveSize); - - unsigned checksum = setShaderHash(ShaderStage::Vertex); - - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) { - SET_REG_FIELD(&config.primShaderRegs, SPI_SHADER_PGM_CHKSUM_GS, CHECKSUM, checksum); - } - - regIA_MULTI_VGT_PARAM iaMultiVgtParam = {}; - - // When non-patch primitives are used without tessellation enabled, PRIMGROUP_SIZE must be at least 4, - // and must be even if there are more than 2 shader engines on the GPU. - unsigned primGroupSize = 128; - unsigned numShaderEngines = m_pipelineState->getTargetInfo().getGpuProperty().numShaderEngines; - if (numShaderEngines > 2) - primGroupSize = alignTo(primGroupSize, 2); - - iaMultiVgtParam.bits.PRIMGROUP_SIZE = primGroupSize - 1; - - SET_REG(&config, IA_MULTI_VGT_PARAM_PIPED, iaMultiVgtParam.u32All); - } else { - invalidRegConfig(config.primShaderRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::Fragment)) { - buildPsRegConfig(ShaderStage::Fragment, &config); - } else { - invalidRegConfig(config.psRegs); - } - - appendConfig(config); -} - -// ===================================================================================================================== -// Builds register configuration for graphics pipeline (NGG, VS-TS-FS). -void ConfigBuilder::buildPipelineNggVsTsFsRegConfig() { - assert(m_gfxIp.major >= 10); // Must be GFX10 or above - - const auto nggControl = m_pipelineState->getNggControl(); - assert(nggControl->enableNgg); - - PipelineNggVsTsFsRegConfig config(m_gfxIp); - - addApiHwShaderMapping(ShaderStage::Vertex, Util::Abi::HwShaderHs); - addApiHwShaderMapping(ShaderStage::TessControl, Util::Abi::HwShaderHs); - addApiHwShaderMapping(ShaderStage::TessEval, Util::Abi::HwShaderGs); - addApiHwShaderMapping(ShaderStage::Fragment, Util::Abi::HwShaderPs); - - setPipelineType(Util::Abi::PipelineType::NggTess); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, MAX_PRIMGRP_IN_WAVE, 2); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, PRIMGEN_EN, true); - if (m_gfxIp.major <= 11) { - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, PRIMGEN_PASSTHRU_EN, nggControl->passthroughMode); - } - if (m_gfxIp.major >= 11) { - SET_REG_GFX10_4_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, PRIMGEN_PASSTHRU_NO_MSG, - nggControl->passthroughMode && !m_pipelineState->enableSwXfb()); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, NGG_WAVE_ID_EN, m_pipelineState->enableSwXfb()); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::Vertex) || - m_pipelineState->hasShaderStage(ShaderStage::TessControl)) { - const bool hasVs = m_pipelineState->hasShaderStage(ShaderStage::Vertex); - const bool hasTcs = m_pipelineState->hasShaderStage(ShaderStage::TessControl); - - buildLsHsRegConfig(hasVs ? ShaderStage::Vertex : ShaderStage::Invalid, - hasTcs ? ShaderStage::TessControl : ShaderStage::Invalid, &config); - - unsigned checksum = setShaderHash(ShaderStage::Vertex); - checksum = checksum ^ setShaderHash(ShaderStage::TessControl); - - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) { - SET_REG_FIELD(&config.lsHsRegs, SPI_SHADER_PGM_CHKSUM_HS, CHECKSUM, checksum); - } - - if (m_gfxIp.major <= 11) - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, LS_EN, LS_STAGE_ON); - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, HS_EN, HS_STAGE_ON); - - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::TessControl); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, HS_W32_EN, (waveSize == 32)); - - setWaveFrontSize(Util::Abi::HardwareStage::Hs, waveSize); - } else { - invalidRegConfig(config.lsHsRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::TessEval)) { - buildPrimShaderRegConfig(ShaderStage::TessEval, ShaderStage::Invalid, &config); - - if (m_gfxIp.major <= 11) { - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, ES_EN, ES_STAGE_DS); - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, VS_EN, VS_STAGE_REAL); - } - - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::TessEval); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, GS_W32_EN, (waveSize == 32)); - - setWaveFrontSize(Util::Abi::HardwareStage::Gs, waveSize); - - unsigned checksum = setShaderHash(ShaderStage::TessEval); - - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) { - SET_REG_FIELD(&config.primShaderRegs, SPI_SHADER_PGM_CHKSUM_GS, CHECKSUM, checksum); - } - } else { - invalidRegConfig(config.primShaderRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::Fragment)) { - buildPsRegConfig(ShaderStage::Fragment, &config); - } else { - invalidRegConfig(config.psRegs); - } - - // Set up IA_MULTI_VGT_PARAM - regIA_MULTI_VGT_PARAM iaMultiVgtParam = {}; - - const auto &tcsBuiltInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessControl)->builtInUsage.tcs; - - if (tcsBuiltInUsage.primitiveId) - iaMultiVgtParam.bits.SWITCH_ON_EOI = true; - - SET_REG(&config, IA_MULTI_VGT_PARAM_PIPED, iaMultiVgtParam.u32All); - - appendConfig(config); -} - -// ===================================================================================================================== -// Builds register configuration for graphics pipeline (NGG, VS-GS-FS). -void ConfigBuilder::buildPipelineNggVsGsFsRegConfig() { - assert(m_gfxIp.major >= 10); // Must be GFX10 or above - - assert(m_pipelineState->getNggControl()->enableNgg); - - PipelineNggVsGsFsRegConfig config(m_gfxIp); - - addApiHwShaderMapping(ShaderStage::Vertex, Util::Abi::HwShaderGs); - addApiHwShaderMapping(ShaderStage::Geometry, Util::Abi::HwShaderGs); - addApiHwShaderMapping(ShaderStage::Fragment, Util::Abi::HwShaderPs); - - setPipelineType(Util::Abi::PipelineType::Ngg); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, MAX_PRIMGRP_IN_WAVE, 2); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, PRIMGEN_EN, true); - if (m_gfxIp.major <= 11) { - // NOTE: When GS is present, NGG pass-through mode is always turned off regardless of the pass-through flag of - // NGG control settings. In such case, the pass-through flag means whether there is culling (different from - // hardware pass-through). - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, PRIMGEN_PASSTHRU_EN, false); - } - if (m_gfxIp.major >= 11) - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, NGG_WAVE_ID_EN, m_pipelineState->enableSwXfb()); - - if (m_pipelineState->hasShaderStage(ShaderStage::Vertex) || m_pipelineState->hasShaderStage(ShaderStage::Geometry)) { - const bool hasVs = m_pipelineState->hasShaderStage(ShaderStage::Vertex); - const bool hasGs = m_pipelineState->hasShaderStage(ShaderStage::Geometry); - - buildPrimShaderRegConfig(hasVs ? ShaderStage::Vertex : ShaderStage::Invalid, - hasGs ? ShaderStage::Geometry : ShaderStage::Invalid, &config); - - unsigned checksum = setShaderHash(ShaderStage::Vertex); - checksum = checksum ^ setShaderHash(ShaderStage::Geometry); - - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) { - SET_REG_FIELD(&config.primShaderRegs, SPI_SHADER_PGM_CHKSUM_GS, CHECKSUM, checksum); - } - - if (m_gfxIp.major <= 11) { - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, ES_EN, ES_STAGE_REAL); - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, VS_EN, VS_STAGE_REAL); - } - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, GS_EN, GS_STAGE_ON); - - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Geometry); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, GS_W32_EN, (waveSize == 32)); - - setWaveFrontSize(Util::Abi::HardwareStage::Gs, waveSize); - } else { - invalidRegConfig(config.primShaderRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::Fragment)) { - buildPsRegConfig(ShaderStage::Fragment, &config); - } else { - invalidRegConfig(config.psRegs); - } - - // Set up IA_MULTI_VGT_PARAM - regIA_MULTI_VGT_PARAM iaMultiVgtParam = {}; - - const unsigned primGroupSize = 128; - iaMultiVgtParam.bits.PRIMGROUP_SIZE = primGroupSize - 1; - - SET_REG(&config, IA_MULTI_VGT_PARAM_PIPED, iaMultiVgtParam.u32All); - - appendConfig(config); -} - -// ===================================================================================================================== -// Builds register configuration for graphics pipeline (NGG, VS-TS-GS-FS). -void ConfigBuilder::buildPipelineNggVsTsGsFsRegConfig() { - assert(m_gfxIp.major >= 10); // Must be GFX10 or above - - assert(m_pipelineState->getNggControl()->enableNgg); - - PipelineNggVsTsGsFsRegConfig config(m_gfxIp); - - addApiHwShaderMapping(ShaderStage::Vertex, Util::Abi::HwShaderHs); - addApiHwShaderMapping(ShaderStage::TessControl, Util::Abi::HwShaderHs); - addApiHwShaderMapping(ShaderStage::TessEval, Util::Abi::HwShaderGs); - addApiHwShaderMapping(ShaderStage::Geometry, Util::Abi::HwShaderGs); - addApiHwShaderMapping(ShaderStage::Fragment, Util::Abi::HwShaderPs); - - setPipelineType(Util::Abi::PipelineType::NggTess); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, MAX_PRIMGRP_IN_WAVE, 2); - - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, PRIMGEN_EN, true); - if (m_gfxIp.major <= 11) { - // NOTE: When GS is present, NGG pass-through mode is always turned off regardless of the pass-through flag of - // NGG control settings. In such case, the pass-through flag means whether there is culling (different from - // hardware pass-through). - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, PRIMGEN_PASSTHRU_EN, false); - } - if (m_gfxIp.major >= 11) - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, NGG_WAVE_ID_EN, m_pipelineState->enableSwXfb()); - - if (m_pipelineState->hasShaderStage(ShaderStage::Vertex) || - m_pipelineState->hasShaderStage(ShaderStage::TessControl)) { - const bool hasVs = m_pipelineState->hasShaderStage(ShaderStage::Vertex); - const bool hasTcs = m_pipelineState->hasShaderStage(ShaderStage::TessControl); - - buildLsHsRegConfig(hasVs ? ShaderStage::Vertex : ShaderStage::Invalid, - hasTcs ? ShaderStage::TessControl : ShaderStage::Invalid, &config); - - unsigned checksum = setShaderHash(ShaderStage::Vertex); - checksum = checksum ^ setShaderHash(ShaderStage::TessControl); - - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) { - SET_REG_FIELD(&config.lsHsRegs, SPI_SHADER_PGM_CHKSUM_HS, CHECKSUM, checksum); - } - - if (m_gfxIp.major <= 11) - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, LS_EN, LS_STAGE_ON); - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, HS_EN, HS_STAGE_ON); - - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::TessControl); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, HS_W32_EN, (waveSize == 32)); - - setWaveFrontSize(Util::Abi::HardwareStage::Hs, waveSize); - } else { - invalidRegConfig(config.lsHsRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::TessEval) || - m_pipelineState->hasShaderStage(ShaderStage::Geometry)) { - const bool hasTes = m_pipelineState->hasShaderStage(ShaderStage::TessEval); - const bool hasGs = m_pipelineState->hasShaderStage(ShaderStage::Geometry); - - buildPrimShaderRegConfig(hasTes ? ShaderStage::TessEval : ShaderStage::Invalid, - hasGs ? ShaderStage::Geometry : ShaderStage::Invalid, - &config); - - unsigned checksum = setShaderHash(ShaderStage::TessEval); - checksum = checksum ^ setShaderHash(ShaderStage::Geometry); - - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) { - SET_REG_FIELD(&config.primShaderRegs, SPI_SHADER_PGM_CHKSUM_GS, CHECKSUM, checksum); - } - - if (m_gfxIp.major <= 11) { - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, ES_EN, ES_STAGE_DS); - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, VS_EN, VS_STAGE_REAL); - } - SET_REG_FIELD(&config, VGT_SHADER_STAGES_EN, GS_EN, GS_STAGE_ON); - - const auto waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Geometry); - SET_REG_GFX10_PLUS_FIELD(&config, VGT_SHADER_STAGES_EN, GS_W32_EN, (waveSize == 32)); - - setWaveFrontSize(Util::Abi::HardwareStage::Gs, waveSize); - } else { - invalidRegConfig(config.primShaderRegs); - } - - if (m_pipelineState->hasShaderStage(ShaderStage::Fragment)) { - buildPsRegConfig(ShaderStage::Fragment, &config); - } else { - invalidRegConfig(config.psRegs); - } - - // Set up IA_MULTI_VGT_PARAM - regIA_MULTI_VGT_PARAM iaMultiVgtParam = {}; - - const auto &tcsBuiltInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessControl)->builtInUsage.tcs; - const auto &gsBuiltInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->builtInUsage.gs; - - if (tcsBuiltInUsage.primitiveId || gsBuiltInUsage.primitiveIdIn) - iaMultiVgtParam.bits.SWITCH_ON_EOI = true; - - SET_REG(&config, IA_MULTI_VGT_PARAM_PIPED, iaMultiVgtParam.u32All); - - // Set up VGT_TF_PARAM - setupVgtTfParam(&config.lsHsRegs); - - appendConfig(config); -} - -// ===================================================================================================================== -// Builds register configuration for graphics pipeline (Mesh-FS). -void ConfigBuilder::buildPipelineMeshFsConfig() { - assert(m_gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ - - PipelineMeshFsRegConfig config(m_gfxIp); - - addApiHwShaderMapping(ShaderStage::Mesh, Util::Abi::HwShaderGs); - addApiHwShaderMapping(ShaderStage::Fragment, Util::Abi::HwShaderPs); - - setPipelineType(Util::Abi::PipelineType::Mesh); - - // Must contain mesh shader - assert(m_pipelineState->hasShaderStage(ShaderStage::Mesh)); - buildMeshRegConfig(ShaderStage::Mesh, &config); - - if (m_pipelineState->hasShaderStage(ShaderStage::Fragment)) { - buildPsRegConfig(ShaderStage::Fragment, &config); - - unsigned checksum = setShaderHash(ShaderStage::Fragment); - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) - SET_REG_FIELD(&config.psRegs, SPI_SHADER_PGM_CHKSUM_PS, CHECKSUM, checksum); - } else { - invalidRegConfig(config.psRegs); - } - - appendConfig(config); -} - -// ===================================================================================================================== -// Builds register configuration for graphics pipeline (Task-Mesh-FS). -void ConfigBuilder::buildPipelineTaskMeshFsConfig() { - assert(m_gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ - - PipelineTaskMeshFsRegConfig config(m_gfxIp); - - addApiHwShaderMapping(ShaderStage::Task, Util::Abi::HwShaderCs); - addApiHwShaderMapping(ShaderStage::Mesh, Util::Abi::HwShaderGs); - addApiHwShaderMapping(ShaderStage::Fragment, Util::Abi::HwShaderPs); - - setPipelineType(Util::Abi::PipelineType::TaskMesh); - - // Must contain task shader - assert(m_pipelineState->hasShaderStage(ShaderStage::Task)); - buildCsRegConfig(ShaderStage::Task, &config.taskRegs); - - if (m_pipelineState->hasShaderStage(ShaderStage::Mesh)) - buildMeshRegConfig(ShaderStage::Mesh, &config); - else - invalidRegConfig(config.meshRegs); - - if (m_pipelineState->hasShaderStage(ShaderStage::Fragment)) { - buildPsRegConfig(ShaderStage::Fragment, &config); - - unsigned checksum = setShaderHash(ShaderStage::Fragment); - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) - SET_REG_FIELD(&config.psRegs, SPI_SHADER_PGM_CHKSUM_PS, CHECKSUM, checksum); - } else { - invalidRegConfig(config.psRegs); - } - - appendConfig(config); -} - -// ===================================================================================================================== -// Builds register configuration for compute pipeline. -void ConfigBuilder::buildPipelineCsRegConfig() { - assert(m_pipelineState->hasShaderStage(ShaderStage::Compute)); - - CsRegConfig config(m_gfxIp); - - addApiHwShaderMapping(ShaderStage::Compute, Util::Abi::HwShaderCs); - - setPipelineType(Util::Abi::PipelineType::Cs); - - buildCsRegConfig(ShaderStage::Compute, &config); - - appendConfig(config); -} - -// ===================================================================================================================== -// Builds register configuration for hardware vertex shader. -// -// @param shaderStage : Current shader stage (from API side) -// @param [out] config : Register configuration for vertex-shader-specific pipeline -template void ConfigBuilder::buildVsRegConfig(ShaderStageEnum shaderStage, T *config) { - assert(shaderStage == ShaderStage::Vertex || shaderStage == ShaderStage::TessEval || - shaderStage == ShaderStage::CopyShader); - - assert(m_gfxIp.major <= 10); // Must be GFX10 or below - - const auto intfData = m_pipelineState->getShaderInterfaceData(shaderStage); - - const auto resUsage = m_pipelineState->getShaderResourceUsage(shaderStage); - const auto &builtInUsage = resUsage->builtInUsage; - - unsigned floatMode = setupFloatingPointMode(shaderStage); - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC1_VS, FLOAT_MODE, floatMode); - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC1_VS, DX10_CLAMP, true); // Follow PAL setting - - const auto &xfbStrides = m_pipelineState->getXfbBufferStrides(); - const auto &streamXfbBuffers = m_pipelineState->getStreamXfbBuffers(); - const bool enableXfb = m_pipelineState->enableXfb(); - const bool enablePrimStats = m_pipelineState->enablePrimStats(); - if (shaderStage == ShaderStage::CopyShader) { - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, USER_SGPR, lgc::CopyShaderUserSgprCount); - setNumAvailSgprs(Util::Abi::HardwareStage::Vs, m_pipelineState->getTargetInfo().getGpuProperty().maxSgprsAvailable); - setNumAvailVgprs(Util::Abi::HardwareStage::Vs, m_pipelineState->getTargetInfo().getGpuProperty().maxVgprsAvailable); - - SET_REG_FIELD(&config->vsRegs, VGT_STRMOUT_CONFIG, STREAMOUT_0_EN, enablePrimStats || streamXfbBuffers[0] > 0); - SET_REG_FIELD(&config->vsRegs, VGT_STRMOUT_CONFIG, STREAMOUT_1_EN, enablePrimStats || streamXfbBuffers[1] > 0); - SET_REG_FIELD(&config->vsRegs, VGT_STRMOUT_CONFIG, STREAMOUT_2_EN, enablePrimStats || streamXfbBuffers[2] > 0); - SET_REG_FIELD(&config->vsRegs, VGT_STRMOUT_CONFIG, STREAMOUT_3_EN, enablePrimStats || streamXfbBuffers[3] > 0); - SET_REG_FIELD(&config->vsRegs, VGT_STRMOUT_CONFIG, RAST_STREAM, m_pipelineState->getRasterizerState().rasterStream); - } else { - const auto &shaderOptions = m_pipelineState->getShaderOptions(shaderStage); - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC1_VS, DEBUG_MODE, shaderOptions.debugMode); - - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, TRAP_PRESENT, shaderOptions.trapPresent); - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, USER_SGPR, intfData->userDataCount); - const bool userSgprMsb = (intfData->userDataCount > 31); - - if (m_gfxIp.major == 10) { - SET_REG_GFX10_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, USER_SGPR_MSB, userSgprMsb); - } else { - SET_REG_GFX9_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, USER_SGPR_MSB, userSgprMsb); - } - - SET_REG_FIELD(&config->vsRegs, VGT_STRMOUT_CONFIG, STREAMOUT_0_EN, enablePrimStats || enableXfb); - SET_REG_FIELD(&config->vsRegs, VGT_STRMOUT_CONFIG, STREAMOUT_1_EN, enablePrimStats); - SET_REG_FIELD(&config->vsRegs, VGT_STRMOUT_CONFIG, STREAMOUT_2_EN, enablePrimStats); - SET_REG_FIELD(&config->vsRegs, VGT_STRMOUT_CONFIG, STREAMOUT_3_EN, enablePrimStats); - - setNumAvailSgprs(Util::Abi::HardwareStage::Vs, resUsage->numSgprsAvailable); - setNumAvailVgprs(Util::Abi::HardwareStage::Vs, resUsage->numVgprsAvailable); - } - - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, SO_EN, enableXfb); - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, SO_BASE0_EN, (xfbStrides[0] > 0)); - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, SO_BASE1_EN, (xfbStrides[1] > 0)); - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, SO_BASE2_EN, (xfbStrides[2] > 0)); - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, SO_BASE3_EN, (xfbStrides[3] > 0)); - - SET_REG_FIELD(&config->vsRegs, VGT_STRMOUT_VTX_STRIDE_0, STRIDE, xfbStrides[0] / sizeof(unsigned)); - SET_REG_FIELD(&config->vsRegs, VGT_STRMOUT_VTX_STRIDE_1, STRIDE, xfbStrides[1] / sizeof(unsigned)); - SET_REG_FIELD(&config->vsRegs, VGT_STRMOUT_VTX_STRIDE_2, STRIDE, xfbStrides[2] / sizeof(unsigned)); - SET_REG_FIELD(&config->vsRegs, VGT_STRMOUT_VTX_STRIDE_3, STRIDE, xfbStrides[3] / sizeof(unsigned)); - - unsigned streamBufferConfig = 0; - for (auto i = 0; i < MaxGsStreams; ++i) - streamBufferConfig |= (streamXfbBuffers[i] << (i * 4)); - SET_REG(&config->vsRegs, VGT_STRMOUT_BUFFER_CONFIG, streamBufferConfig); - - if (m_gfxIp.major == 10) { - SET_REG_GFX10_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC1_VS, MEM_ORDERED, true); - } - - if (shaderStage == ShaderStage::Vertex) { - if (builtInUsage.vs.instanceIndex) { - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC1_VS, VGPR_COMP_CNT, 3); // 3: Enable instance ID - } else if (builtInUsage.vs.primitiveId) { - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC1_VS, VGPR_COMP_CNT, 2); - } - } else if (shaderStage == ShaderStage::TessEval) { - if (builtInUsage.tes.primitiveId) { - // NOTE: when primitive ID is used, set vgtCompCnt to 3 directly because primitive ID is the last VGPR. - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC1_VS, VGPR_COMP_CNT, 3); // 3: Enable primitive ID - } else { - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC1_VS, VGPR_COMP_CNT, 2); - } - - SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, OC_LDS_EN, true); - } - - setupPaSpecificRegisters(&config->vsRegs); -} - -// ===================================================================================================================== -// Builds register configuration for hardware local-hull merged shader. -// -// @param shaderStage1 : Current first shader stage (from API side) -// @param shaderStage2 : Current second shader stage (from API side) -// @param [out] config : Register configuration for local-hull-shader-specific pipeline -template -void ConfigBuilder::buildLsHsRegConfig(ShaderStageEnum shaderStage1, ShaderStageEnum shaderStage2, T *config) { - assert(shaderStage1 == ShaderStage::Vertex || shaderStage1 == ShaderStage::Invalid); - assert(shaderStage2 == ShaderStage::TessControl || shaderStage2 == ShaderStage::Invalid); - - const auto tcsResUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessControl); - const auto vsResUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Vertex); - const auto &vsBuiltInUsage = vsResUsage->builtInUsage.vs; - - unsigned floatMode = setupFloatingPointMode(shaderStage2 != ShaderStage::Invalid ? shaderStage2 : shaderStage1); - SET_REG_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC1_HS, FLOAT_MODE, floatMode); - SET_REG_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC1_HS, DX10_CLAMP, true); // Follow PAL setting - - unsigned lsVgprCompCnt = 0; - if (m_gfxIp.major <= 11) { - if (vsBuiltInUsage.instanceIndex) - lsVgprCompCnt = 3; // Enable all LS VGPRs (LS VGPR2 - VGPR5) - else - lsVgprCompCnt = 1; // Must enable relative vertex ID (LS VGPR2 and VGPR3) - } else { - llvm_unreachable("Not implemented!"); - } - SET_REG_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC1_HS, LS_VGPR_COMP_CNT, lsVgprCompCnt); - - const auto &vsIntfData = m_pipelineState->getShaderInterfaceData(ShaderStage::Vertex); - const auto &tcsIntfData = m_pipelineState->getShaderInterfaceData(ShaderStage::TessControl); - unsigned userDataCount = std::max(vsIntfData->userDataCount, tcsIntfData->userDataCount); - - const auto &tcsShaderOptions = m_pipelineState->getShaderOptions(ShaderStage::TessControl); - SET_REG_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC1_HS, DEBUG_MODE, tcsShaderOptions.debugMode); - - const bool userSgprMsb = (userDataCount > 31); - if (m_gfxIp.major >= 10) { - bool wgpMode = (m_pipelineState->getShaderWgpMode(ShaderStage::Vertex) || - m_pipelineState->getShaderWgpMode(ShaderStage::TessControl)); - - SET_REG_GFX10_PLUS_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC1_HS, MEM_ORDERED, true); - SET_REG_GFX10_PLUS_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC1_HS, WGP_MODE, wgpMode); - SET_REG_GFX10_PLUS_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC2_HS, USER_SGPR_MSB, userSgprMsb); - - // The shared scratch offset is reused by HW to provide HS wave ID in group - SET_REG_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC2_HS, SCRATCH_EN, m_pipelineState->canOptimizeTessFactor()); - } else { - SET_REG_GFX9_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC2_HS, USER_SGPR_MSB, userSgprMsb); - } - SET_REG_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC2_HS, TRAP_PRESENT, tcsShaderOptions.trapPresent); - SET_REG_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC2_HS, USER_SGPR, userDataCount); - - const auto &calcFactor = tcsResUsage->inOutUsage.tcs.calcFactor; - - const unsigned ldsSizeDwordGranularityShift = - m_pipelineState->getTargetInfo().getGpuProperty().ldsSizeDwordGranularityShift; - const unsigned ldsSizeDwordGranularity = 1u << ldsSizeDwordGranularityShift; - - unsigned ldsSizeInDwords = calcFactor.tessOnChipLdsSize; - ldsSizeInDwords += calcFactor.rayQueryLdsStackSize; - ldsSizeInDwords = alignTo(ldsSizeInDwords, ldsSizeDwordGranularity); - - const unsigned ldsSize = ldsSizeInDwords >> ldsSizeDwordGranularityShift; - SET_REG_GFX10_PLUS_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC2_HS, LDS_SIZE, ldsSize); - - if (m_gfxIp.major >= 11) { - // Pixel wait sync+ - const bool useImageOp = vsResUsage->useImageOp || tcsResUsage->useImageOp; - SET_REG_GFX11_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC4_HS, IMAGE_OP, useImageOp); - } - - setLdsSizeByteSize(Util::Abi::HardwareStage::Hs, ldsSizeInDwords * 4); - - // Minimum and maximum tessellation factors supported by the hardware. - constexpr float minTessFactor = 1.0f; - constexpr float maxTessFactor = 64.0f; - SET_REG(&config->lsHsRegs, VGT_HOS_MIN_TESS_LEVEL, bit_cast(minTessFactor)); - SET_REG(&config->lsHsRegs, VGT_HOS_MAX_TESS_LEVEL, bit_cast(maxTessFactor)); - - // Set VGT_LS_HS_CONFIG - SET_REG_FIELD(&config->lsHsRegs, VGT_LS_HS_CONFIG, NUM_PATCHES, calcFactor.patchCountPerThreadGroup); - SET_REG_FIELD(&config->lsHsRegs, VGT_LS_HS_CONFIG, HS_NUM_INPUT_CP, m_pipelineState->getNumPatchControlPoints()); - - auto hsNumOutputCp = m_pipelineState->getShaderModes()->getTessellationMode().outputVertices; - SET_REG_FIELD(&config->lsHsRegs, VGT_LS_HS_CONFIG, HS_NUM_OUTPUT_CP, hsNumOutputCp); - - setNumAvailSgprs(Util::Abi::HardwareStage::Hs, tcsResUsage->numSgprsAvailable); - setNumAvailVgprs(Util::Abi::HardwareStage::Hs, tcsResUsage->numVgprsAvailable); - - // Set up VGT_TF_PARAM - setupVgtTfParam(&config->lsHsRegs); -} - -// ===================================================================================================================== -// Builds register configuration for hardware export-geometry merged shader. -// -// @param shaderStage1 : Current first shader stage (from API side) -// @param shaderStage2 : Current second shader stage (from API side) -// @param [out] config : Register configuration for export-geometry-shader-specific pipeline -template -void ConfigBuilder::buildEsGsRegConfig(ShaderStageEnum shaderStage1, ShaderStageEnum shaderStage2, T *config) { - assert(shaderStage1 == ShaderStage::Vertex || shaderStage1 == ShaderStage::TessEval || - shaderStage1 == ShaderStage::Invalid); - assert(shaderStage2 == ShaderStage::Geometry || shaderStage2 == ShaderStage::Invalid); - - assert(m_gfxIp.major <= 10); // Must be GFX10 or below - - const bool hasTs = m_pipelineState->hasShaderStage(ShaderStage::TessControl) || - m_pipelineState->hasShaderStage(ShaderStage::TessEval); - - const auto vsResUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Vertex); - const auto &vsBuiltInUsage = vsResUsage->builtInUsage.vs; - - const auto tesResUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessEval); - const auto &tesBuiltInUsage = tesResUsage->builtInUsage.tes; - - const auto gsResUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry); - const auto &gsBuiltInUsage = gsResUsage->builtInUsage.gs; - const auto &geometryMode = m_pipelineState->getShaderModes()->getGeometryShaderMode(); - const auto &gsInOutUsage = gsResUsage->inOutUsage; - const auto &calcFactor = gsInOutUsage.gs.calcFactor; - - unsigned gsVgprCompCnt = 0; - if (calcFactor.inputVertices > 4 || gsBuiltInUsage.invocationId) - gsVgprCompCnt = 3; // Enable vtx4/vtx5 offset (GS VGPR3) or GS instance ID (GS VGPR4) - else if (gsBuiltInUsage.primitiveIdIn) - gsVgprCompCnt = 2; // Enable primitive ID (GS VGPR2) - else if (calcFactor.inputVertices > 2) - gsVgprCompCnt = 1; // Enable vtx2/vtx3 offset (GS VGPR1) - - SET_REG_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC1_GS, GS_VGPR_COMP_CNT, gsVgprCompCnt); - - unsigned floatMode = setupFloatingPointMode(shaderStage2 != ShaderStage::Invalid ? shaderStage2 : shaderStage1); - SET_REG_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC1_GS, FLOAT_MODE, floatMode); - SET_REG_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC1_GS, DX10_CLAMP, true); // Follow PAL setting - - const auto vsIntfData = m_pipelineState->getShaderInterfaceData(ShaderStage::Vertex); - const auto tesIntfData = m_pipelineState->getShaderInterfaceData(ShaderStage::TessEval); - const auto gsIntfData = m_pipelineState->getShaderInterfaceData(ShaderStage::Geometry); - unsigned userDataCount = - std::max((hasTs ? tesIntfData->userDataCount : vsIntfData->userDataCount), gsIntfData->userDataCount); - - const auto &gsShaderOptions = m_pipelineState->getShaderOptions(ShaderStage::Geometry); - SET_REG_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC1_GS, DEBUG_MODE, gsShaderOptions.debugMode); - - const bool userSgprMsb = (userDataCount > 31); - if (m_gfxIp.major == 10) { - bool wgpMode = m_pipelineState->getShaderWgpMode(hasTs ? ShaderStage::TessEval : ShaderStage::Vertex) || - m_pipelineState->getShaderWgpMode(ShaderStage::Geometry); - - SET_REG_GFX10_PLUS_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC1_GS, MEM_ORDERED, true); - SET_REG_GFX10_PLUS_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC1_GS, WGP_MODE, wgpMode); - SET_REG_GFX10_PLUS_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC2_GS, USER_SGPR_MSB, userSgprMsb); - } else { - SET_REG_GFX9_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC2_GS, USER_SGPR_MSB, userSgprMsb); - } - - SET_REG_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC2_GS, TRAP_PRESENT, gsShaderOptions.trapPresent); - SET_REG_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC2_GS, USER_SGPR, userDataCount); - - unsigned esVgprCompCnt = 0; - if (hasTs) { - if (tesBuiltInUsage.primitiveId) - esVgprCompCnt = 3; // Enable patch ID (ES VGPR8) - else - esVgprCompCnt = 2; // Must enable relative patch ID (ES VGPR7) - } else { - if (vsBuiltInUsage.instanceIndex) - esVgprCompCnt = 3; // Enable instance ID (ES VGPR8) - } - SET_REG_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC2_GS, ES_VGPR_COMP_CNT, esVgprCompCnt); - - SET_REG_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC2_GS, OC_LDS_EN, hasTs); - - const unsigned ldsSizeDwordGranularityShift = - m_pipelineState->getTargetInfo().getGpuProperty().ldsSizeDwordGranularityShift; - const unsigned ldsSizeDwordGranularity = 1u << ldsSizeDwordGranularityShift; - - unsigned ldsSizeInDwords = calcFactor.gsOnChipLdsSize; - ldsSizeInDwords += calcFactor.rayQueryLdsStackSize; - ldsSizeInDwords = alignTo(ldsSizeInDwords, ldsSizeDwordGranularity); - - const unsigned ldsSize = ldsSizeInDwords >> ldsSizeDwordGranularityShift; - SET_REG_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC2_GS, LDS_SIZE, ldsSize); - - setLdsSizeByteSize(Util::Abi::HardwareStage::Gs, ldsSizeInDwords * 4); - - unsigned maxVertOut = std::max(1u, static_cast(geometryMode.outputVertices)); - SET_REG_FIELD(&config->esGsRegs, VGT_GS_MAX_VERT_OUT, MAX_VERT_OUT, maxVertOut); - - // TODO: Currently only support offchip GS - SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, MODE, GS_SCENARIO_G); - - if (m_pipelineState->isGsOnChip()) { - SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ONCHIP, VGT_GS_MODE_ONCHIP_ON); - SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ES_WRITE_OPTIMIZE, false); - SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, GS_WRITE_OPTIMIZE, false); - } else { - SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ONCHIP, VGT_GS_MODE_ONCHIP_OFF); - SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ES_WRITE_OPTIMIZE, false); - SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, GS_WRITE_OPTIMIZE, true); - } - - if (geometryMode.outputVertices <= 128) { - SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, CUT_MODE, GS_CUT_128__HASHWVS); - } else if (geometryMode.outputVertices <= 256) { - SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, CUT_MODE, GS_CUT_256__HASHWVS); - } else if (geometryMode.outputVertices <= 512) { - SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, CUT_MODE, GS_CUT_512__HASHWVS); - } else { - SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, CUT_MODE, GS_CUT_1024__HASHWVS); - } - - SET_REG_FIELD(&config->esGsRegs, VGT_GS_ONCHIP_CNTL, ES_VERTS_PER_SUBGRP, calcFactor.esVertsPerSubgroup); - SET_REG_FIELD(&config->esGsRegs, VGT_GS_ONCHIP_CNTL, GS_PRIMS_PER_SUBGRP, calcFactor.gsPrimsPerSubgroup); - - // NOTE: The value of field "GS_INST_PRIMS_IN_SUBGRP" should be strictly equal to the product of - // VGT_GS_ONCHIP_CNTL.GS_PRIMS_PER_SUBGRP * VGT_GS_INSTANCE_CNT.CNT. - const unsigned gsInstPrimsInSubgrp = - geometryMode.invocations > 1 ? (calcFactor.gsPrimsPerSubgroup * geometryMode.invocations) : 0; - SET_REG_FIELD(&config->esGsRegs, VGT_GS_ONCHIP_CNTL, GS_INST_PRIMS_IN_SUBGRP, gsInstPrimsInSubgrp); - - unsigned gsVertItemSize0 = sizeof(unsigned) * gsInOutUsage.gs.outLocCount[0]; - SET_REG_FIELD(&config->esGsRegs, VGT_GS_VERT_ITEMSIZE, ITEMSIZE, gsVertItemSize0); - - unsigned gsVertItemSize1 = sizeof(unsigned) * gsInOutUsage.gs.outLocCount[1]; - SET_REG_FIELD(&config->esGsRegs, VGT_GS_VERT_ITEMSIZE_1, ITEMSIZE, gsVertItemSize1); - - unsigned gsVertItemSize2 = sizeof(unsigned) * gsInOutUsage.gs.outLocCount[2]; - SET_REG_FIELD(&config->esGsRegs, VGT_GS_VERT_ITEMSIZE_2, ITEMSIZE, gsVertItemSize2); - - unsigned gsVertItemSize3 = sizeof(unsigned) * gsInOutUsage.gs.outLocCount[3]; - SET_REG_FIELD(&config->esGsRegs, VGT_GS_VERT_ITEMSIZE_3, ITEMSIZE, gsVertItemSize3); - - unsigned gsVsRingOffset = gsVertItemSize0 * maxVertOut; - SET_REG_FIELD(&config->esGsRegs, VGT_GSVS_RING_OFFSET_1, OFFSET, gsVsRingOffset); - - gsVsRingOffset += gsVertItemSize1 * maxVertOut; - SET_REG_FIELD(&config->esGsRegs, VGT_GSVS_RING_OFFSET_2, OFFSET, gsVsRingOffset); - - gsVsRingOffset += gsVertItemSize2 * maxVertOut; - SET_REG_FIELD(&config->esGsRegs, VGT_GSVS_RING_OFFSET_3, OFFSET, gsVsRingOffset); - - if (geometryMode.invocations > 1 || gsBuiltInUsage.invocationId) { - SET_REG_FIELD(&config->esGsRegs, VGT_GS_INSTANCE_CNT, ENABLE, true); - SET_REG_FIELD(&config->esGsRegs, VGT_GS_INSTANCE_CNT, CNT, geometryMode.invocations); - } - SET_REG_FIELD(&config->esGsRegs, VGT_GS_PER_VS, GS_PER_VS, GsThreadsPerVsThread); - - VGT_GS_OUTPRIM_TYPE gsOutputPrimitiveType = TRISTRIP; - if (geometryMode.outputPrimitive == OutputPrimitives::Points) - gsOutputPrimitiveType = POINTLIST; - else if (geometryMode.outputPrimitive == OutputPrimitives::LineStrip) - gsOutputPrimitiveType = LINESTRIP; - - SET_REG_FIELD(&config->esGsRegs, VGT_GS_OUT_PRIM_TYPE, OUTPRIM_TYPE, gsOutputPrimitiveType); - - // Set multi-stream output primitive type - if (gsVertItemSize1 > 0 || gsVertItemSize2 > 0 || gsVertItemSize3 > 0) { - const static auto GsOutPrimInvalid = 3u; - SET_REG_GFX9_10_FIELD(&config->esGsRegs, VGT_GS_OUT_PRIM_TYPE, OUTPRIM_TYPE_1, - gsVertItemSize1 > 0 ? gsOutputPrimitiveType : GsOutPrimInvalid); - - SET_REG_GFX9_10_FIELD(&config->esGsRegs, VGT_GS_OUT_PRIM_TYPE, OUTPRIM_TYPE_2, - gsVertItemSize2 > 0 ? gsOutputPrimitiveType : GsOutPrimInvalid); - - SET_REG_GFX9_10_FIELD(&config->esGsRegs, VGT_GS_OUT_PRIM_TYPE, OUTPRIM_TYPE_3, - gsVertItemSize3 > 0 ? gsOutputPrimitiveType : GsOutPrimInvalid); - } - - SET_REG_FIELD(&config->esGsRegs, VGT_GSVS_RING_ITEMSIZE, ITEMSIZE, calcFactor.gsVsRingItemSize); - SET_REG_FIELD(&config->esGsRegs, VGT_ESGS_RING_ITEMSIZE, ITEMSIZE, calcFactor.esGsRingItemSize); - - const unsigned maxPrimsPerSubgroup = std::min(gsInstPrimsInSubgrp * maxVertOut, MaxGsThreadsPerSubgroup); - - SET_REG_FIELD(&config->esGsRegs, GE_MAX_OUTPUT_PER_SUBGROUP, MAX_VERTS_PER_SUBGROUP, maxPrimsPerSubgroup); - - setNumAvailSgprs(Util::Abi::HardwareStage::Gs, gsResUsage->numSgprsAvailable); - setNumAvailVgprs(Util::Abi::HardwareStage::Gs, gsResUsage->numVgprsAvailable); -} - -// ===================================================================================================================== -// Builds register configuration for hardware primitive shader. -// -// @param shaderStage1 : Current first shader stage (from API side) -// @param shaderStage2 : Current second shader stage (from API side) -// @param [out] config : Register configuration for primitive-shader-specific pipeline -template -void ConfigBuilder::buildPrimShaderRegConfig(ShaderStageEnum shaderStage1, ShaderStageEnum shaderStage2, T *config) { - assert(shaderStage1 == ShaderStage::Vertex || shaderStage1 == ShaderStage::TessEval || - shaderStage1 == ShaderStage::Invalid); - assert(shaderStage2 == ShaderStage::Geometry || shaderStage2 == ShaderStage::Invalid); - - assert(m_gfxIp.major >= 10); // Must be GFX10 or above - - const auto nggControl = m_pipelineState->getNggControl(); - assert(nggControl->enableNgg); - - const bool hasTs = m_pipelineState->hasShaderStage(ShaderStage::TessControl) || - m_pipelineState->hasShaderStage(ShaderStage::TessEval); - const bool hasGs = m_pipelineState->hasShaderStage(ShaderStage::Geometry); - - const auto vsResUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Vertex); - const auto &vsBuiltInUsage = vsResUsage->builtInUsage.vs; - - const auto tesResUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessEval); - const auto &tesBuiltInUsage = tesResUsage->builtInUsage.tes; - - const auto gsResUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry); - const auto &gsBuiltInUsage = gsResUsage->builtInUsage.gs; - const auto &geometryMode = m_pipelineState->getShaderModes()->getGeometryShaderMode(); - const auto &gsInOutUsage = gsResUsage->inOutUsage; - const auto &calcFactor = gsInOutUsage.gs.calcFactor; - - // - // Build ES-GS specific configuration - // - unsigned gsVgprCompCnt = 0; - if (m_gfxIp.major <= 11) { - if (hasGs) { - if (calcFactor.inputVertices > 4 || gsBuiltInUsage.invocationId) - gsVgprCompCnt = 3; // Enable vtx4/vtx5 offset (GS VGPR3) or GS instance ID (GS VGPR4) - else if (gsBuiltInUsage.primitiveIdIn) - gsVgprCompCnt = 2; // Enable primitive ID (GS VGPR2) - else if (calcFactor.inputVertices > 2) - gsVgprCompCnt = 1; // Enable vtx2/vtx3 offset (GS VGPR1) - } else { - // NOTE: When GS is absent, only those VGPRs are required: vtx0/vtx1 offset, vtx2/vtx3 offset, - // primitive ID (only for VS). - gsVgprCompCnt = 1; - if (!hasTs && vsBuiltInUsage.primitiveId) - gsVgprCompCnt = 2; // Enable primitive ID (GS VGPR2) - } - } else { - llvm_unreachable("Not implemented!"); - } - SET_REG_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC1_GS, GS_VGPR_COMP_CNT, gsVgprCompCnt); - - unsigned floatMode = setupFloatingPointMode(shaderStage2 != ShaderStage::Invalid ? shaderStage2 : shaderStage1); - SET_REG_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC1_GS, FLOAT_MODE, floatMode); - SET_REG_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC1_GS, DX10_CLAMP, true); // Follow PAL setting - - const auto vsIntfData = m_pipelineState->getShaderInterfaceData(ShaderStage::Vertex); - const auto tesIntfData = m_pipelineState->getShaderInterfaceData(ShaderStage::TessEval); - const auto gsIntfData = m_pipelineState->getShaderInterfaceData(ShaderStage::Geometry); - unsigned userDataCount = - std::max((hasTs ? tesIntfData->userDataCount : vsIntfData->userDataCount), gsIntfData->userDataCount); - - const auto &gsShaderOptions = m_pipelineState->getShaderOptions(ShaderStage::Geometry); - bool wgpMode = m_pipelineState->getShaderWgpMode(hasTs ? ShaderStage::TessEval : ShaderStage::Vertex); - if (hasGs) - wgpMode = (wgpMode || m_pipelineState->getShaderWgpMode(ShaderStage::Geometry)); - - SET_REG_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC1_GS, DEBUG_MODE, gsShaderOptions.debugMode); - SET_REG_GFX10_PLUS_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC1_GS, MEM_ORDERED, true); - SET_REG_GFX10_PLUS_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC1_GS, WGP_MODE, wgpMode); - - SET_REG_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC2_GS, TRAP_PRESENT, gsShaderOptions.trapPresent); - SET_REG_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC2_GS, USER_SGPR, userDataCount); - - const bool userSgprMsb = (userDataCount > 31); - - SET_REG_GFX10_PLUS_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC2_GS, USER_SGPR_MSB, userSgprMsb); - - unsigned esVgprCompCnt = 0; - if (m_gfxIp.major <= 11) { - if (hasTs) { - if (tesBuiltInUsage.primitiveId) - esVgprCompCnt = 3; // Enable patch ID (ES VGPR8) - else - esVgprCompCnt = 2; // Must enable relative patch ID (ES VGPR7) - } else { - if (vsBuiltInUsage.instanceIndex) - esVgprCompCnt = 3; // Enable instance ID (ES VGPR8) - } - } else { - - llvm_unreachable("Not implemented!"); - } - SET_REG_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC2_GS, ES_VGPR_COMP_CNT, esVgprCompCnt); - - SET_REG_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC2_GS, OC_LDS_EN, hasTs); - - const unsigned ldsSizeDwordGranularityShift = - m_pipelineState->getTargetInfo().getGpuProperty().ldsSizeDwordGranularityShift; - const unsigned ldsSizeDwordGranularity = 1u << ldsSizeDwordGranularityShift; - - unsigned ldsSizeInDwords = calcFactor.gsOnChipLdsSize; - ldsSizeInDwords += calcFactor.rayQueryLdsStackSize; - ldsSizeInDwords = alignTo(ldsSizeInDwords, ldsSizeDwordGranularity); - - const unsigned ldsSize = ldsSizeInDwords >> ldsSizeDwordGranularityShift; - SET_REG_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC2_GS, LDS_SIZE, ldsSize); - setLdsSizeByteSize(Util::Abi::HardwareStage::Gs, ldsSizeInDwords * 4); - - if (m_gfxIp.major >= 11) { - // Pixel wait sync+ - bool useImageOp = hasGs ? gsResUsage->useImageOp : false; - if (hasTs) - useImageOp |= tesResUsage->useImageOp; - else - useImageOp |= vsResUsage->useImageOp; - SET_REG_GFX11_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC4_GS, IMAGE_OP, useImageOp); - } - - unsigned maxVertOut = std::max(1u, static_cast(geometryMode.outputVertices)); - SET_REG_FIELD(&config->primShaderRegs, VGT_GS_MAX_VERT_OUT, MAX_VERT_OUT, maxVertOut); - - SET_REG_FIELD(&config->primShaderRegs, VGT_GS_MODE, MODE, GS_SCENARIO_G); - SET_REG_FIELD(&config->primShaderRegs, VGT_GS_MODE, ONCHIP, VGT_GS_MODE_ONCHIP_OFF); - SET_REG_FIELD(&config->primShaderRegs, VGT_GS_MODE, ES_WRITE_OPTIMIZE, false); - SET_REG_FIELD(&config->primShaderRegs, VGT_GS_MODE, GS_WRITE_OPTIMIZE, true); - - SET_REG_FIELD(&config->primShaderRegs, VGT_GS_ONCHIP_CNTL, ES_VERTS_PER_SUBGRP, calcFactor.esVertsPerSubgroup); - SET_REG_FIELD(&config->primShaderRegs, VGT_GS_ONCHIP_CNTL, GS_PRIMS_PER_SUBGRP, calcFactor.gsPrimsPerSubgroup); - setNggSubgroupSize(std::max(calcFactor.esVertsPerSubgroup, calcFactor.gsPrimsPerSubgroup)); - - const unsigned gsInstPrimsInSubgrp = geometryMode.invocations > 1 - ? (calcFactor.gsPrimsPerSubgroup * geometryMode.invocations) - : calcFactor.gsPrimsPerSubgroup; - SET_REG_FIELD(&config->primShaderRegs, VGT_GS_ONCHIP_CNTL, GS_INST_PRIMS_IN_SUBGRP, gsInstPrimsInSubgrp); - - unsigned gsVertItemSize = 4 * gsInOutUsage.outputMapLocCount; - SET_REG_FIELD(&config->primShaderRegs, VGT_GS_VERT_ITEMSIZE, ITEMSIZE, gsVertItemSize); - - if (geometryMode.invocations > 1 || gsBuiltInUsage.invocationId) { - SET_REG_FIELD(&config->primShaderRegs, VGT_GS_INSTANCE_CNT, ENABLE, true); - SET_REG_FIELD(&config->primShaderRegs, VGT_GS_INSTANCE_CNT, CNT, geometryMode.invocations); - if (m_gfxIp >= GfxIpVersion{10, 1}) { - SET_REG_GFX10_PLUS_FIELD(&config->primShaderRegs, VGT_GS_INSTANCE_CNT, EN_MAX_VERT_OUT_PER_GS_INSTANCE, - calcFactor.enableMaxVertOut); - } - } - SET_REG_FIELD(&config->primShaderRegs, VGT_GS_PER_VS, GS_PER_VS, GsThreadsPerVsThread); - - VGT_GS_OUTPRIM_TYPE gsOutputPrimitiveType = POINTLIST; - if (hasGs) { - // GS present - if (gsInOutUsage.outputMapLocCount == 0) - gsOutputPrimitiveType = POINTLIST; - else if (geometryMode.outputPrimitive == OutputPrimitives::Points) - gsOutputPrimitiveType = POINTLIST; - else if (geometryMode.outputPrimitive == OutputPrimitives::LineStrip) - gsOutputPrimitiveType = LINESTRIP; - else if (geometryMode.outputPrimitive == OutputPrimitives::TriangleStrip) - gsOutputPrimitiveType = TRISTRIP; - else - llvm_unreachable("Should never be called!"); - } else if (hasTs) { - // With tessellation - const auto &tessMode = m_pipelineState->getShaderModes()->getTessellationMode(); - if (tessMode.pointMode) - gsOutputPrimitiveType = POINTLIST; - else if (tessMode.primitiveMode == PrimitiveMode::Isolines) - gsOutputPrimitiveType = LINESTRIP; - else if (tessMode.primitiveMode == PrimitiveMode::Triangles || tessMode.primitiveMode == PrimitiveMode::Quads) - gsOutputPrimitiveType = TRISTRIP; - else - llvm_unreachable("Should never be called!"); - } else { - // Without tessellation - const auto primType = m_pipelineState->getInputAssemblyState().primitiveType; - switch (primType) { - case PrimitiveType::Point: - gsOutputPrimitiveType = POINTLIST; - break; - case PrimitiveType::LineList: - case PrimitiveType::LineStrip: - gsOutputPrimitiveType = LINESTRIP; - break; - case PrimitiveType::TriangleList: - case PrimitiveType::TriangleStrip: - case PrimitiveType::TriangleFan: - case PrimitiveType::TriangleListAdjacency: - case PrimitiveType::TriangleStripAdjacency: - gsOutputPrimitiveType = TRISTRIP; - break; - default: - llvm_unreachable("Should never be called!"); - break; - } - } - - // TODO: Multiple output streams are not supported. - SET_REG_FIELD(&config->primShaderRegs, VGT_GS_OUT_PRIM_TYPE, OUTPRIM_TYPE, gsOutputPrimitiveType); - if (m_gfxIp.major <= 11) { - SET_REG_FIELD(&config->primShaderRegs, VGT_GSVS_RING_ITEMSIZE, ITEMSIZE, calcFactor.gsVsRingItemSize); - // NOTE: When GS is absent, always set ES-GS ring item size to 1. Thus, we can easily get vertex ID in subgroup - // without any additional calculations. - SET_REG_FIELD(&config->primShaderRegs, VGT_ESGS_RING_ITEMSIZE, ITEMSIZE, hasGs ? calcFactor.esGsRingItemSize : 1); - } - - const unsigned maxVertsPerSubgroup = std::min(gsInstPrimsInSubgrp * maxVertOut, NggMaxThreadsPerSubgroup); - SET_REG_FIELD(&config->primShaderRegs, GE_MAX_OUTPUT_PER_SUBGROUP, MAX_VERTS_PER_SUBGROUP, maxVertsPerSubgroup); - - if (hasGs) { - setNumAvailSgprs(Util::Abi::HardwareStage::Gs, gsResUsage->numSgprsAvailable); - setNumAvailVgprs(Util::Abi::HardwareStage::Gs, gsResUsage->numVgprsAvailable); - } else { - if (hasTs) { - setNumAvailSgprs(Util::Abi::HardwareStage::Gs, tesResUsage->numSgprsAvailable); - setNumAvailVgprs(Util::Abi::HardwareStage::Gs, tesResUsage->numVgprsAvailable); - } else { - setNumAvailSgprs(Util::Abi::HardwareStage::Gs, vsResUsage->numSgprsAvailable); - setNumAvailVgprs(Util::Abi::HardwareStage::Gs, vsResUsage->numVgprsAvailable); - } - } - - // - // Build VS specific configuration - // - setupPaSpecificRegisters(&config->primShaderRegs); - - // - // Build NGG configuration - // - assert(calcFactor.primAmpFactor >= 1); - SET_REG_FIELD(&config->primShaderRegs, GE_NGG_SUBGRP_CNTL, PRIM_AMP_FACTOR, calcFactor.primAmpFactor); - SET_REG_FIELD(&config->primShaderRegs, GE_NGG_SUBGRP_CNTL, THDS_PER_SUBGRP, NggMaxThreadsPerSubgroup); - - // TODO: Support PIPELINE_PRIM_ID. - SET_REG_FIELD(&config->primShaderRegs, SPI_SHADER_IDX_FORMAT, IDX0_EXPORT_FORMAT, SPI_SHADER_1COMP); - - if (nggControl->passthroughMode) { - INVALIDATE_REG(&config->primShaderRegs, SPI_SHADER_PGM_LO_GS); - } else { - // NOTE: For NGG culling mode, the primitive shader table that contains culling data might be accessed by - // shader. PAL expects 64-bit address of that table and will program it into SPI_SHADER_PGM_LO_GS and - // SPI_SHADER_PGM_HI_GS if we do not provide one. By setting SPI_SHADER_PGM_LO_GS to NggCullingData, we tell - // PAL that we will not provide it and it is fine to use SPI_SHADER_PGM_LO_GS and SPI_SHADER_PGM_HI_GS as - // the address of that table. - SET_REG(&config->primShaderRegs, SPI_SHADER_PGM_LO_GS, static_cast(UserDataMapping::NggCullingData)); - } - - // - // Build SW stream-out configuration (GFX11+) - // - if (m_pipelineState->enableSwXfb()) { - const auto &xfbStrides = m_pipelineState->getXfbBufferStrides(); - std::array xfbStridesInDwords; - for (unsigned i = 0; i < xfbStridesInDwords.size(); ++i) { - // Must be multiple of dword (PAL doesn't support 16-bit transform feedback outputs) - assert(xfbStrides[i] % sizeof(unsigned) == 0); - xfbStridesInDwords[i] = xfbStrides[i] / sizeof(unsigned); - } - setStreamOutVertexStrides(xfbStridesInDwords); // Set SW stream-out vertex strides - } -} - -// ===================================================================================================================== -// Builds register configuration for hardware pixel shader. -// -// @param shaderStage : Current shader stage (from API side) -// @param [out] config : Register configuration for pixel-shader-specific pipeline -template void ConfigBuilder::buildPsRegConfig(ShaderStageEnum shaderStage, T *config) { - assert(shaderStage == ShaderStage::Fragment); - - const auto intfData = m_pipelineState->getShaderInterfaceData(shaderStage); - const auto &options = m_pipelineState->getOptions(); - const auto &shaderOptions = m_pipelineState->getShaderOptions(shaderStage); - const auto resUsage = m_pipelineState->getShaderResourceUsage(shaderStage); - const auto &builtInUsage = resUsage->builtInUsage.fs; - const auto &fragmentMode = m_pipelineState->getShaderModes()->getFragmentShaderMode(); - - unsigned floatMode = setupFloatingPointMode(shaderStage); - SET_REG_FIELD(&config->psRegs, SPI_SHADER_PGM_RSRC1_PS, FLOAT_MODE, floatMode); - SET_REG_FIELD(&config->psRegs, SPI_SHADER_PGM_RSRC1_PS, DX10_CLAMP, true); // Follow PAL setting - SET_REG_FIELD(&config->psRegs, SPI_SHADER_PGM_RSRC1_PS, DEBUG_MODE, shaderOptions.debugMode); - - SET_REG_FIELD(&config->psRegs, SPI_SHADER_PGM_RSRC2_PS, TRAP_PRESENT, shaderOptions.trapPresent); - SET_REG_FIELD(&config->psRegs, SPI_SHADER_PGM_RSRC2_PS, USER_SGPR, intfData->userDataCount); - - const bool userSgprMsb = (intfData->userDataCount > 31); - - if (m_gfxIp.major >= 10) { - SET_REG_GFX10_PLUS_FIELD(&config->psRegs, SPI_SHADER_PGM_RSRC1_PS, MEM_ORDERED, true); - SET_REG_MOST_FIELD(&config->psRegs, PA_STEREO_CNTL, STEREO_MODE, STATE_STEREO_X); - SET_REG_GFX10_PLUS_FIELD(&config->psRegs, SPI_SHADER_PGM_RSRC2_PS, USER_SGPR_MSB, userSgprMsb); - } else { - SET_REG_GFX9_FIELD(&config->psRegs, SPI_SHADER_PGM_RSRC2_PS, USER_SGPR_MSB, userSgprMsb); - } - - if (intfData->entryArgIdxs.fs.provokingVtxInfo != 0) { - assert(m_gfxIp >= GfxIpVersion({10, 3})); - SET_REG_GFX10_3_PLUS_EXCLUSIVE_FIELD(&config->psRegs, SPI_SHADER_PGM_RSRC1_PS, LOAD_PROVOKING_VTX, true); - } - - if (m_gfxIp.major >= 11) { - // Pixel wait sync+ - SET_REG_GFX11_FIELD(&config->psRegs, SPI_SHADER_PGM_RSRC4_PS, IMAGE_OP, resUsage->useImageOp); - } - - const bool useFloatLocationAtIteratedSampleNumber = - options.fragCoordUsesInterpLoc ? builtInUsage.fragCoordIsSample : builtInUsage.runAtSampleRate; - - SET_REG_FIELD(&config->psRegs, SPI_BARYC_CNTL, FRONT_FACE_ALL_BITS, true); - if (fragmentMode.pixelCenterInteger) { - // TRUE - Force floating point position to upper left corner of pixel (X.0, Y.0) - SET_REG_FIELD(&config->psRegs, SPI_BARYC_CNTL, POS_FLOAT_ULC, true); - } else if (useFloatLocationAtIteratedSampleNumber) { - // 2 - Calculate per-pixel floating point position at iterated sample number - SET_REG_FIELD(&config->psRegs, SPI_BARYC_CNTL, POS_FLOAT_LOCATION, 2); - } else { - // 0 - Calculate per-pixel floating point position at pixel center - SET_REG_FIELD(&config->psRegs, SPI_BARYC_CNTL, POS_FLOAT_LOCATION, 0); - } - - SET_REG_FIELD(&config->psRegs, PA_SC_MODE_CNTL_1, WALK_ALIGN8_PRIM_FITS_ST, true); - SET_REG_FIELD(&config->psRegs, PA_SC_MODE_CNTL_1, WALK_FENCE_ENABLE, true); - SET_REG_FIELD(&config->psRegs, PA_SC_MODE_CNTL_1, TILE_WALK_ORDER_ENABLE, true); - SET_REG_FIELD(&config->psRegs, PA_SC_MODE_CNTL_1, PS_ITER_SAMPLE, builtInUsage.runAtSampleRate); - - SET_REG_FIELD(&config->psRegs, PA_SC_MODE_CNTL_1, SUPERTILE_WALK_ORDER_ENABLE, true); - SET_REG_FIELD(&config->psRegs, PA_SC_MODE_CNTL_1, MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE, true); - SET_REG_FIELD(&config->psRegs, PA_SC_MODE_CNTL_1, FORCE_EOV_CNTDWN_ENABLE, true); - SET_REG_FIELD(&config->psRegs, PA_SC_MODE_CNTL_1, FORCE_EOV_REZ_ENABLE, true); - - ZOrder zOrder = LATE_Z; - bool execOnHeirFail = false; - if (shaderOptions.forceLateZ) - zOrder = LATE_Z; - else if (fragmentMode.earlyFragmentTests) - zOrder = EARLY_Z_THEN_LATE_Z; - else if (resUsage->resourceWrite) { - zOrder = LATE_Z; - execOnHeirFail = true; - } else if (shaderOptions.allowReZ) - zOrder = EARLY_Z_THEN_RE_Z; - else - zOrder = EARLY_Z_THEN_LATE_Z; - - ConservativeZExport conservativeZExport = EXPORT_ANY_Z; - if (fragmentMode.conservativeDepth == ConservativeDepth::LessEqual) - conservativeZExport = EXPORT_LESS_THAN_Z; - else if (fragmentMode.conservativeDepth == ConservativeDepth::GreaterEqual) - conservativeZExport = EXPORT_GREATER_THAN_Z; - - SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, Z_ORDER, zOrder); - SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, KILL_ENABLE, builtInUsage.discard); - SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, Z_EXPORT_ENABLE, builtInUsage.fragDepth); - SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, STENCIL_TEST_VAL_EXPORT_ENABLE, builtInUsage.fragStencilRef); - SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, MASK_EXPORT_ENABLE, builtInUsage.sampleMask); - SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, ALPHA_TO_MASK_DISABLE, 1); // Set during pipeline finalization. - SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, DEPTH_BEFORE_SHADER, fragmentMode.earlyFragmentTests); - SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, EXEC_ON_NOOP, - (fragmentMode.earlyFragmentTests && resUsage->resourceWrite)); - SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, EXEC_ON_HIER_FAIL, execOnHeirFail); - SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, CONSERVATIVE_Z_EXPORT, conservativeZExport); - - if (m_gfxIp.major >= 10) { - SET_REG_GFX10_PLUS_FIELD(&config->psRegs, DB_SHADER_CONTROL, PRE_SHADER_DEPTH_COVERAGE_ENABLE, - fragmentMode.postDepthCoverage); - } - - const auto waveSize = m_pipelineState->getShaderWaveSize(shaderStage); - SET_REG_GFX10_PLUS_FIELD(&config->psRegs, SPI_PS_IN_CONTROL, PS_W32_EN, (waveSize == 32)); - - if (m_gfxIp.major >= 10) - setWaveFrontSize(Util::Abi::HardwareStage::Ps, waveSize); - - unsigned pointCoordLoc = InvalidValue; - - auto builtInInputLocMapIt = resUsage->inOutUsage.builtInInputLocMap.find(BuiltInPointCoord); - if (builtInInputLocMapIt != resUsage->inOutUsage.builtInInputLocMap.end()) { - // Get generic input corresponding to gl_PointCoord (to set the field PT_SPRITE_TEX) - pointCoordLoc = builtInInputLocMapIt->second; - } - - // NOTE: PAL expects at least one mmSPI_PS_INPUT_CNTL_0 register set, so we always patch it at least one if none - // were identified in the shader. - const std::vector dummyInterpInfo{{0, false, false, false, false, false, false}}; - const auto &fsInterpInfo = resUsage->inOutUsage.fs.interpInfo; - const auto *interpInfo = fsInterpInfo.size() == 0 ? &dummyInterpInfo : &fsInterpInfo; - - unsigned numPrimInterp = 0; - for (unsigned i = 0; i < interpInfo->size(); ++i) { - auto interpInfoElem = (*interpInfo)[i]; - - if (interpInfoElem.isPerPrimitive) - ++numPrimInterp; - - if (!m_pipelineState->isWholePipeline() && interpInfoElem.loc == InvalidFsInterpInfo.loc) { - appendConfig(mmSPI_PS_INPUT_CNTL_0 + i, i); - continue; - } - if ((interpInfoElem.loc == InvalidFsInterpInfo.loc && interpInfoElem.flat == InvalidFsInterpInfo.flat && - interpInfoElem.custom == InvalidFsInterpInfo.custom && interpInfoElem.is16bit == InvalidFsInterpInfo.is16bit)) - interpInfoElem.loc = i; - - regSPI_PS_INPUT_CNTL_0 spiPsInputCntl = {}; - // NOTE: Flat shading flag is only set for per-vertex parameter. - spiPsInputCntl.bits.FLAT_SHADE = interpInfoElem.flat && !interpInfoElem.isPerPrimitive; - spiPsInputCntl.bits.OFFSET = interpInfoElem.loc; - if (m_gfxIp.major >= 11 && interpInfoElem.isPerPrimitive) { - // NOTE: HW allocates and manages attribute ring based on the register fields: VS_EXPORT_COUNT and - // PRIM_EXPORT_COUNT. When VS_EXPORT_COUNT = 0, HW assumes there is still a vertex attribute exported even - // though this is not what we want. Hence, we should reserve param0 as a dummy vertex attribute and all - // primitive attributes are moved after it. - const bool hasNoVertexAttrib = resUsage->inOutUsage.inputMapLocCount == 0; // No vertex attribute - if (hasNoVertexAttrib) - ++spiPsInputCntl.bits.OFFSET; - - spiPsInputCntl.gfx11.PRIM_ATTR = true; - } - - if (interpInfoElem.custom) { - // NOTE: Force parameter cache data to be read in passthrough mode. - static const unsigned PassThroughMode = (1 << 5); - spiPsInputCntl.bits.FLAT_SHADE = true; - spiPsInputCntl.bits.OFFSET |= PassThroughMode; - } else if (!interpInfoElem.flat && interpInfoElem.is16bit) { - spiPsInputCntl.bits.FP16_INTERP_MODE = true; - spiPsInputCntl.bits.ATTR0_VALID = interpInfoElem.attr0Valid; - spiPsInputCntl.bits.ATTR1_VALID = interpInfoElem.attr1Valid; - } - - constexpr unsigned UseDefaultVal = (1 << 5); - if (pointCoordLoc == i) { - spiPsInputCntl.bits.PT_SPRITE_TEX = true; - - // NOTE: Set the offset value to force hardware to select input defaults (no VS match). - spiPsInputCntl.bits.OFFSET = UseDefaultVal; - } - - // NOTE: Set SPI_PS_INPUT_CNTL_* here, but the register can still be changed later, - // when it becomes known that gl_ViewportIndex is not used and fields OFFSET and FLAT_SHADE - // can be amended. - appendConfig(mmSPI_PS_INPUT_CNTL_0 + i, spiPsInputCntl.u32All); - } - - unsigned numInterp = resUsage->inOutUsage.fs.interpInfo.size() - numPrimInterp; - if (m_gfxIp.major >= 11) { - // NOTE: For GFX11+, vertex attributes and primitive attributes are counted together. The field - // SPI_PS_INPUT_CNTL.PRIM_ATTR is used to differentiate them. - numInterp = resUsage->inOutUsage.fs.interpInfo.size(); - } - SET_REG_FIELD(&config->psRegs, SPI_PS_IN_CONTROL, NUM_INTERP, numInterp); - if (m_gfxIp.isGfx(10, 3)) - SET_REG_GFX10_3_PLUS_EXCLUSIVE_FIELD(&config->psRegs, SPI_PS_IN_CONTROL, NUM_PRIM_INTERP, numPrimInterp); - - if (pointCoordLoc != InvalidValue) { - SET_REG_FIELD(&config->psRegs, SPI_INTERP_CONTROL_0, PNT_SPRITE_ENA, true); - SET_REG_FIELD(&config->psRegs, SPI_INTERP_CONTROL_0, PNT_SPRITE_OVRD_X, SPI_PNT_SPRITE_SEL_S); - SET_REG_FIELD(&config->psRegs, SPI_INTERP_CONTROL_0, PNT_SPRITE_OVRD_Y, SPI_PNT_SPRITE_SEL_T); - SET_REG_FIELD(&config->psRegs, SPI_INTERP_CONTROL_0, PNT_SPRITE_OVRD_Z, SPI_PNT_SPRITE_SEL_0); - SET_REG_FIELD(&config->psRegs, SPI_INTERP_CONTROL_0, PNT_SPRITE_OVRD_W, SPI_PNT_SPRITE_SEL_1); - } - - if (m_pipelineState->getPalAbiVersion() >= 456) { - setPsUsesUavs(resUsage->resourceWrite || resUsage->resourceRead); - setPsWritesUavs(resUsage->resourceWrite); - setPsWritesDepth(builtInUsage.fragDepth); - } else - setPsUsesUavs(static_cast(resUsage->resourceWrite)); - - setPsSampleMask(builtInUsage.sampleMaskIn | builtInUsage.sampleMask); - - const unsigned loadCollisionWaveId = GET_REG_FIELD(&config->psRegs, SPI_SHADER_PGM_RSRC2_PS, LOAD_COLLISION_WAVEID); - const unsigned loadIntrawaveCollision = - GET_REG_FIELD(&config->psRegs, SPI_SHADER_PGM_RSRC2_PS, LOAD_INTRAWAVE_COLLISION); - - SET_REG_CORE_FIELD(&config->psRegs, PA_SC_SHADER_CONTROL, LOAD_COLLISION_WAVEID, loadCollisionWaveId); - SET_REG_CORE_FIELD(&config->psRegs, PA_SC_SHADER_CONTROL, LOAD_INTRAWAVE_COLLISION, loadIntrawaveCollision); - - setNumAvailSgprs(Util::Abi::HardwareStage::Ps, resUsage->numSgprsAvailable); - setNumAvailVgprs(Util::Abi::HardwareStage::Ps, resUsage->numVgprsAvailable); - - const unsigned checksum = setShaderHash(shaderStage); - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) - SET_REG_FIELD(&config->psRegs, SPI_SHADER_PGM_CHKSUM_PS, CHECKSUM, checksum); -} - -// ===================================================================================================================== -// Builds register configuration for mesh shader. -// -// @param shaderStage : Current shader stage (from API side) -// @param [out] config : Register configuration for mesh-shader-specific pipeline -template void ConfigBuilder::buildMeshRegConfig(ShaderStageEnum shaderStage, T *config) { - assert(shaderStage == ShaderStage::Mesh); - - assert(m_gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ - - const auto resUsage = m_pipelineState->getShaderResourceUsage(shaderStage); - const auto intfData = m_pipelineState->getShaderInterfaceData(shaderStage); - - const auto &builtInUsage = resUsage->builtInUsage.mesh; - - const auto &meshMode = m_pipelineState->getShaderModes()->getMeshShaderMode(); - const auto &calcFactor = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.gs.calcFactor; - - SET_REG_FIELD(&config->meshRegs, VGT_SHADER_STAGES_EN, MAX_PRIMGRP_IN_WAVE, 2); - - SET_REG_FIELD(&config->meshRegs, VGT_SHADER_STAGES_EN, PRIMGEN_EN, true); - - const unsigned waveSize = m_pipelineState->getShaderWaveSize(shaderStage); - if (waveSize == 32) - SET_REG_GFX10_PLUS_FIELD(&config->meshRegs, VGT_SHADER_STAGES_EN, GS_W32_EN, true); - - if (m_gfxIp.major <= 11) - SET_REG_FIELD(&config->meshRegs, VGT_SHADER_STAGES_EN, ES_EN, ES_STAGE_REAL); - SET_REG_FIELD(&config->meshRegs, VGT_SHADER_STAGES_EN, GS_EN, GS_STAGE_ON); - const unsigned gsFastLaunch = m_gfxIp.major == 11 ? 0x2 : 0x1; // GFX11 sets GS fast launch mode to 0x2 - SET_REG_GFX09_1X_PLUS_FIELD(&config->meshRegs, VGT_SHADER_STAGES_EN, GS_FAST_LAUNCH, gsFastLaunch); - - // - // Build ES-GS specific configuration - // - unsigned floatMode = setupFloatingPointMode(shaderStage); - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_PGM_RSRC1_GS, FLOAT_MODE, floatMode); - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_PGM_RSRC1_GS, DX10_CLAMP, true); // Follow PAL setting - - unsigned userDataCount = intfData->userDataCount; - - const auto &shaderOptions = m_pipelineState->getShaderOptions(shaderStage); - bool wgpMode = m_pipelineState->getShaderWgpMode(shaderStage); - - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_PGM_RSRC1_GS, DEBUG_MODE, shaderOptions.debugMode); - SET_REG_GFX10_PLUS_FIELD(&config->meshRegs, SPI_SHADER_PGM_RSRC1_GS, MEM_ORDERED, true); - SET_REG_GFX10_PLUS_FIELD(&config->meshRegs, SPI_SHADER_PGM_RSRC1_GS, WGP_MODE, wgpMode); - - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_PGM_RSRC2_GS, TRAP_PRESENT, shaderOptions.trapPresent); - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_PGM_RSRC2_GS, USER_SGPR, userDataCount); - - const bool userSgprMsb = (userDataCount > 31); - SET_REG_GFX10_PLUS_FIELD(&config->meshRegs, SPI_SHADER_PGM_RSRC2_GS, USER_SGPR_MSB, userSgprMsb); - - const unsigned ldsSizeDwordGranularityShift = - m_pipelineState->getTargetInfo().getGpuProperty().ldsSizeDwordGranularityShift; - const unsigned ldsSizeDwordGranularity = 1u << ldsSizeDwordGranularityShift; - - unsigned ldsSizeInDwords = calcFactor.gsOnChipLdsSize; - ldsSizeInDwords = alignTo(ldsSizeInDwords, ldsSizeDwordGranularity); - - const unsigned ldsSize = ldsSizeInDwords >> ldsSizeDwordGranularityShift; - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_PGM_RSRC2_GS, LDS_SIZE, ldsSize); - setLdsSizeByteSize(Util::Abi::HardwareStage::Gs, ldsSizeInDwords * 4); - - if (m_gfxIp.major >= 11) { - // Pixel wait sync+ - SET_REG_GFX11_FIELD(&config->meshRegs, SPI_SHADER_PGM_RSRC4_GS, IMAGE_OP, resUsage->useImageOp); - } - - unsigned maxVertOut = std::max(1u, static_cast(meshMode.outputVertices)); - SET_REG_FIELD(&config->meshRegs, VGT_GS_MAX_VERT_OUT, MAX_VERT_OUT, maxVertOut); - - SET_REG_FIELD(&config->meshRegs, VGT_GS_MODE, MODE, GS_SCENARIO_G); - SET_REG_FIELD(&config->meshRegs, VGT_GS_MODE, ONCHIP, VGT_GS_MODE_ONCHIP_OFF); - SET_REG_FIELD(&config->meshRegs, VGT_GS_MODE, ES_WRITE_OPTIMIZE, false); - SET_REG_FIELD(&config->meshRegs, VGT_GS_MODE, GS_WRITE_OPTIMIZE, true); - - assert(calcFactor.esVertsPerSubgroup == 1 && calcFactor.gsPrimsPerSubgroup == 1); - SET_REG_FIELD(&config->meshRegs, VGT_GS_ONCHIP_CNTL, ES_VERTS_PER_SUBGRP, 1); - SET_REG_FIELD(&config->meshRegs, VGT_GS_ONCHIP_CNTL, GS_PRIMS_PER_SUBGRP, 1); - SET_REG_FIELD(&config->meshRegs, VGT_GS_ONCHIP_CNTL, GS_INST_PRIMS_IN_SUBGRP, 1); - setNggSubgroupSize(1); - - SET_REG_FIELD(&config->meshRegs, VGT_GS_PER_VS, GS_PER_VS, GsThreadsPerVsThread); - - VGT_GS_OUTPRIM_TYPE gsOutputPrimitiveType = POINTLIST; - switch (meshMode.outputPrimitive) { - case OutputPrimitives::Points: - gsOutputPrimitiveType = POINTLIST; - break; - case OutputPrimitives::Lines: - gsOutputPrimitiveType = LINESTRIP; - break; - case OutputPrimitives::Triangles: - gsOutputPrimitiveType = TRISTRIP; - break; - default: - llvm_unreachable("Unknown primitive type!"); - break; - } - SET_REG_FIELD(&config->meshRegs, VGT_GS_OUT_PRIM_TYPE, OUTPRIM_TYPE, gsOutputPrimitiveType); - - const unsigned maxVertsPerSubgroup = std::min(meshMode.outputVertices, NggMaxThreadsPerSubgroup); - SET_REG_FIELD(&config->meshRegs, GE_MAX_OUTPUT_PER_SUBGROUP, MAX_VERTS_PER_SUBGROUP, maxVertsPerSubgroup); - - // - // Build VS specific configuration - // - setupPaSpecificRegisters(&config->meshRegs); - - // - // Build NGG specific configuration - // - assert(calcFactor.primAmpFactor >= 1); - SET_REG_FIELD(&config->meshRegs, GE_NGG_SUBGRP_CNTL, PRIM_AMP_FACTOR, calcFactor.primAmpFactor); - SET_REG_FIELD(&config->meshRegs, GE_NGG_SUBGRP_CNTL, THDS_PER_SUBGRP, calcFactor.primAmpFactor); - - const bool enableMultiView = m_pipelineState->getInputAssemblyState().multiView != MultiViewMode::Disable; - bool hasPrimitivePayload = - builtInUsage.layer || builtInUsage.viewportIndex || builtInUsage.primitiveShadingRate || enableMultiView; - if (m_gfxIp.major < 11) - hasPrimitivePayload |= builtInUsage.primitiveId; - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_IDX_FORMAT, IDX0_EXPORT_FORMAT, - hasPrimitivePayload ? SPI_SHADER_2COMP : SPI_SHADER_1COMP); - SET_REG_GFX10_PLUS_FIELD(&config->meshRegs, VGT_DRAW_PAYLOAD_CNTL, EN_PRIM_PAYLOAD, hasPrimitivePayload); - - if (m_gfxIp.major >= 11) { - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_GS_MESHLET_DIM, MESHLET_NUM_THREAD_X, meshMode.workgroupSizeX - 1); - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_GS_MESHLET_DIM, MESHLET_NUM_THREAD_Y, meshMode.workgroupSizeY - 1); - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_GS_MESHLET_DIM, MESHLET_NUM_THREAD_Z, meshMode.workgroupSizeZ - 1); - // NOTE: If row export for mesh shader is enabled, the thread group size is set according to dimensions of work - // group. Otherwise, it is set according to actual primitive amplification factor. - const unsigned threadGroupSize = m_pipelineState->enableMeshRowExport() - ? meshMode.workgroupSizeX * meshMode.workgroupSizeY * meshMode.workgroupSizeZ - : calcFactor.primAmpFactor; - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_GS_MESHLET_DIM, MESHLET_THREADGROUP_SIZE, threadGroupSize - 1); - - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_GS_MESHLET_EXP_ALLOC, MAX_EXP_VERTS, meshMode.outputVertices); - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_GS_MESHLET_EXP_ALLOC, MAX_EXP_PRIMS, meshMode.outputPrimitives); - } - - setWaveFrontSize(Util::Abi::HardwareStage::Gs, waveSize); - - setNumAvailSgprs(Util::Abi::HardwareStage::Gs, resUsage->numSgprsAvailable); - setNumAvailVgprs(Util::Abi::HardwareStage::Gs, resUsage->numVgprsAvailable); - - const unsigned checksum = setShaderHash(ShaderStage::Mesh); - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) - SET_REG_FIELD(&config->meshRegs, SPI_SHADER_PGM_CHKSUM_GS, CHECKSUM, checksum); - - // - // Set up IA_MULTI_VGT_PARAM - // - regIA_MULTI_VGT_PARAM iaMultiVgtParam = {}; - - static constexpr unsigned PrimGroupSize = 128; - iaMultiVgtParam.bits.PRIMGROUP_SIZE = PrimGroupSize - 1; - - SET_REG(&config->meshRegs, IA_MULTI_VGT_PARAM_PIPED, iaMultiVgtParam.u32All); -} - -// ===================================================================================================================== -// Builds register configuration for compute/task shader. -// -// @param shaderStage : Current shader stage (from API side) -// @param [out] config : Register configuration for compute -void ConfigBuilder::buildCsRegConfig(ShaderStageEnum shaderStage, CsRegConfig *config) { - assert(shaderStage == ShaderStage::Compute || shaderStage == ShaderStage::Task); - - const auto intfData = m_pipelineState->getShaderInterfaceData(shaderStage); - const auto &shaderOptions = m_pipelineState->getShaderOptions(shaderStage); - const auto resUsage = m_pipelineState->getShaderResourceUsage(shaderStage); - const auto &computeMode = m_pipelineState->getShaderModes()->getComputeShaderMode(); - - unsigned workgroupSizes[3] = {}; - if (shaderStage == ShaderStage::Compute) { - const auto &builtInUsage = resUsage->builtInUsage.cs; - if (builtInUsage.foldWorkgroupXY) { - workgroupSizes[0] = computeMode.workgroupSizeX * computeMode.workgroupSizeY; - workgroupSizes[1] = computeMode.workgroupSizeZ; - workgroupSizes[2] = 1; - } else { - workgroupSizes[0] = computeMode.workgroupSizeX; - workgroupSizes[1] = computeMode.workgroupSizeY; - workgroupSizes[2] = computeMode.workgroupSizeZ; - } - } else { - assert(shaderStage == ShaderStage::Task); - workgroupSizes[0] = computeMode.workgroupSizeX; - workgroupSizes[1] = computeMode.workgroupSizeY; - workgroupSizes[2] = computeMode.workgroupSizeZ; - } - - unsigned floatMode = setupFloatingPointMode(shaderStage); - SET_REG_FIELD(config, COMPUTE_PGM_RSRC1, FLOAT_MODE, floatMode); - SET_REG_FIELD(config, COMPUTE_PGM_RSRC1, DX10_CLAMP, true); // Follow PAL setting - SET_REG_FIELD(config, COMPUTE_PGM_RSRC1, DEBUG_MODE, shaderOptions.debugMode); - - if (m_gfxIp.major >= 10) { - bool wgpMode = m_pipelineState->getShaderWgpMode(shaderStage); - - SET_REG_GFX10_PLUS_FIELD(config, COMPUTE_PGM_RSRC1, MEM_ORDERED, true); - SET_REG_GFX10_PLUS_FIELD(config, COMPUTE_PGM_RSRC1, WGP_MODE, wgpMode); - unsigned waveSize = m_pipelineState->getShaderWaveSize(shaderStage); - assert(waveSize == 32 || waveSize == 64); - setWaveFrontSize(Util::Abi::HardwareStage::Cs, waveSize); - } - - // Set registers based on shader interface data - SET_REG_FIELD(config, COMPUTE_PGM_RSRC2, TRAP_PRESENT, shaderOptions.trapPresent); - SET_REG_FIELD(config, COMPUTE_PGM_RSRC2, USER_SGPR, intfData->userDataCount); - - Function *entryFunc = nullptr; - for (Function &func : *m_module) { - // Only entrypoint compute shader may have the function attribute for workgroup id optimization. - if (isShaderEntryPoint(&func)) { - entryFunc = &func; - break; - } - } - bool hasWorkgroupIdX = !entryFunc || !entryFunc->hasFnAttribute("amdgpu-no-workgroup-id-x"); - bool hasWorkgroupIdY = !entryFunc || !entryFunc->hasFnAttribute("amdgpu-no-workgroup-id-y"); - bool hasWorkgroupIdZ = !entryFunc || !entryFunc->hasFnAttribute("amdgpu-no-workgroup-id-z"); - SET_REG_FIELD(config, COMPUTE_PGM_RSRC2, TGID_X_EN, hasWorkgroupIdX); - SET_REG_FIELD(config, COMPUTE_PGM_RSRC2, TGID_Y_EN, hasWorkgroupIdY); - SET_REG_FIELD(config, COMPUTE_PGM_RSRC2, TGID_Z_EN, hasWorkgroupIdZ); - SET_REG_FIELD(config, COMPUTE_PGM_RSRC2, TG_SIZE_EN, true); - - // 0 = X, 1 = XY, 2 = XYZ - unsigned tidigCompCnt = 0; - if (workgroupSizes[2] > 1) - tidigCompCnt = 2; - else if (workgroupSizes[1] > 1) - tidigCompCnt = 1; - SET_REG_FIELD(config, COMPUTE_PGM_RSRC2, TIDIG_COMP_CNT, tidigCompCnt); - - if (m_gfxIp.major >= 11) { - // Pixel wait sync+ - SET_REG_GFX11_FIELD(config, COMPUTE_PGM_RSRC3, IMAGE_OP, resUsage->useImageOp); - } - - SET_REG_FIELD(config, COMPUTE_NUM_THREAD_X, NUM_THREAD_FULL, workgroupSizes[0]); - SET_REG_FIELD(config, COMPUTE_NUM_THREAD_Y, NUM_THREAD_FULL, workgroupSizes[1]); - SET_REG_FIELD(config, COMPUTE_NUM_THREAD_Z, NUM_THREAD_FULL, workgroupSizes[2]); - - setThreadgroupDimensions(workgroupSizes); - - setNumAvailSgprs(Util::Abi::HardwareStage::Cs, resUsage->numSgprsAvailable); - setNumAvailVgprs(Util::Abi::HardwareStage::Cs, resUsage->numVgprsAvailable); - - const unsigned checksum = setShaderHash(shaderStage); - if (m_pipelineState->getTargetInfo().getGpuProperty().supportShaderPowerProfiling) - SET_REG_FIELD(config, COMPUTE_SHADER_CHKSUM, CHECKSUM, checksum); -} - -// ===================================================================================================================== -// Sets up the register value for VGT_TF_PARAM. -// -// @param [out] config : Register configuration for local-hull-shader-specific pipeline -void ConfigBuilder::setupVgtTfParam(LsHsRegConfig *config) { - unsigned primType = InvalidValue; - unsigned partition = InvalidValue; - unsigned topology = InvalidValue; - - const auto &tessMode = m_pipelineState->getShaderModes()->getTessellationMode(); - - assert(tessMode.primitiveMode != PrimitiveMode::Unknown); - if (tessMode.primitiveMode == PrimitiveMode::Isolines) - primType = TESS_ISOLINE; - else if (tessMode.primitiveMode == PrimitiveMode::Triangles) - primType = TESS_TRIANGLE; - else if (tessMode.primitiveMode == PrimitiveMode::Quads) - primType = TESS_QUAD; - assert(primType != InvalidValue); - - assert(tessMode.vertexSpacing != VertexSpacing::Unknown); - if (tessMode.vertexSpacing == VertexSpacing::Equal) - partition = PART_INTEGER; - else if (tessMode.vertexSpacing == VertexSpacing::FractionalOdd) - partition = PART_FRAC_ODD; - else if (tessMode.vertexSpacing == VertexSpacing::FractionalEven) - partition = PART_FRAC_EVEN; - assert(partition != InvalidValue); - - assert(tessMode.vertexOrder != VertexOrder::Unknown); - if (tessMode.pointMode) - topology = OUTPUT_POINT; - else if (tessMode.primitiveMode == PrimitiveMode::Isolines) - topology = OUTPUT_LINE; - else if (tessMode.vertexOrder == VertexOrder::Cw) - topology = OUTPUT_TRIANGLE_CW; - else if (tessMode.vertexOrder == VertexOrder::Ccw) - topology = OUTPUT_TRIANGLE_CCW; - - if (m_pipelineState->getInputAssemblyState().switchWinding) { - if (topology == OUTPUT_TRIANGLE_CW) - topology = OUTPUT_TRIANGLE_CCW; - else if (topology == OUTPUT_TRIANGLE_CCW) - topology = OUTPUT_TRIANGLE_CW; - } - - assert(topology != InvalidValue); - - SET_REG_FIELD(config, VGT_TF_PARAM, TYPE, primType); - SET_REG_FIELD(config, VGT_TF_PARAM, PARTITIONING, partition); - SET_REG_FIELD(config, VGT_TF_PARAM, TOPOLOGY, topology); - SET_REG_FIELD(config, VGT_TF_PARAM, DISTRIBUTION_MODE, TRAPEZOIDS); -} - -// ===================================================================================================================== -// Set up PA-specific (primitive assembler) registers. -// -// @param [out] config : Register configuration -template void ConfigBuilder::setupPaSpecificRegisters(T *config) { - const bool hasTs = m_pipelineState->hasShaderStage(ShaderStage::TessControl) || - m_pipelineState->hasShaderStage(ShaderStage::TessEval); - const bool hasGs = m_pipelineState->hasShaderStage(ShaderStage::Geometry); - const bool meshPipeline = - m_pipelineState->hasShaderStage(ShaderStage::Task) || m_pipelineState->hasShaderStage(ShaderStage::Mesh); - - bool rasterizerDiscardEnable = m_pipelineState->getRasterizerState().rasterizerDiscardEnable; - SET_REG_FIELD(config, PA_CL_CLIP_CNTL, DX_LINEAR_ATTR_CLIP_ENA, true); - SET_REG_FIELD(config, PA_CL_CLIP_CNTL, DX_RASTERIZATION_KILL, rasterizerDiscardEnable); - - SET_REG_FIELD(config, PA_CL_VTE_CNTL, VPORT_X_SCALE_ENA, true); - SET_REG_FIELD(config, PA_CL_VTE_CNTL, VPORT_X_OFFSET_ENA, true); - SET_REG_FIELD(config, PA_CL_VTE_CNTL, VPORT_Y_SCALE_ENA, true); - SET_REG_FIELD(config, PA_CL_VTE_CNTL, VPORT_Y_OFFSET_ENA, true); - SET_REG_FIELD(config, PA_CL_VTE_CNTL, VPORT_Z_SCALE_ENA, true); - SET_REG_FIELD(config, PA_CL_VTE_CNTL, VPORT_Z_OFFSET_ENA, true); - SET_REG_FIELD(config, PA_CL_VTE_CNTL, VTX_W0_FMT, true); - - SET_REG_FIELD(config, PA_SU_VTX_CNTL, PIX_CENTER, 1); - SET_REG_FIELD(config, PA_SU_VTX_CNTL, ROUND_MODE, 2); // Round to even - SET_REG_FIELD(config, PA_SU_VTX_CNTL, QUANT_MODE, 5); // Use 8-bit fractions - - // Stage-specific processing - bool usePointSize = false; - bool useEdgeFlag = false; - bool useLayer = false; - bool useViewportIndex = false; - bool useShadingRate = false; - unsigned clipDistanceCount = 0; - unsigned cullDistanceCount = 0; - - unsigned expCount = 0; - unsigned primExpCount = 0; - - if (meshPipeline) { - // Mesh pipeline - assert(m_gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ - - const auto resUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh); - const auto &builtInUsage = resUsage->builtInUsage.mesh; - - usePointSize = builtInUsage.pointSize; - useLayer = builtInUsage.layer; - useViewportIndex = builtInUsage.viewportIndex; - useShadingRate = builtInUsage.primitiveShadingRate; - clipDistanceCount = builtInUsage.clipDistance; - cullDistanceCount = builtInUsage.cullDistance; - - expCount = resUsage->inOutUsage.expCount; - primExpCount = resUsage->inOutUsage.primExpCount; - } else { - bool usePrimitiveId = false; - - if (hasGs) { - const auto resUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry); - const auto &builtInUsage = resUsage->builtInUsage.gs; - - usePointSize = builtInUsage.pointSize; - usePrimitiveId = builtInUsage.primitiveIdIn; - useLayer = builtInUsage.layer; - useViewportIndex = builtInUsage.viewportIndex; - useShadingRate = builtInUsage.primitiveShadingRate; - clipDistanceCount = builtInUsage.clipDistance; - cullDistanceCount = builtInUsage.cullDistance; - - expCount = resUsage->inOutUsage.expCount; - - // NOTE: For ES-GS merged shader, the actual use of primitive ID should take both ES and GS into consideration. - if (hasTs) { - const auto &tesBuiltInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessEval)->builtInUsage.tes; - usePrimitiveId = usePrimitiveId || tesBuiltInUsage.primitiveId; - } else { - const auto &vsBuiltInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Vertex)->builtInUsage.vs; - usePrimitiveId = usePrimitiveId || vsBuiltInUsage.primitiveId; - } - } else if (hasTs) { - const auto resUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessEval); - const auto &builtInUsage = resUsage->builtInUsage.tes; - - usePointSize = builtInUsage.pointSize; - useLayer = builtInUsage.layer; - useViewportIndex = builtInUsage.viewportIndex; - clipDistanceCount = builtInUsage.clipDistance; - cullDistanceCount = builtInUsage.cullDistance; - - expCount = resUsage->inOutUsage.expCount; - } else { - const auto resUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Vertex); - const auto &builtInUsage = resUsage->builtInUsage.vs; - - usePointSize = builtInUsage.pointSize; - useEdgeFlag = builtInUsage.edgeFlag; - usePrimitiveId = builtInUsage.primitiveId; - useLayer = builtInUsage.layer; - useViewportIndex = builtInUsage.viewportIndex; - useShadingRate = builtInUsage.primitiveShadingRate; - clipDistanceCount = builtInUsage.clipDistance; - cullDistanceCount = builtInUsage.cullDistance; - - expCount = resUsage->inOutUsage.expCount; - } - - useLayer = useLayer || m_pipelineState->getInputAssemblyState().multiView != MultiViewMode::Disable; - if (m_pipelineState->getInputAssemblyState().multiView == MultiViewMode::PerView) - useViewportIndex = true; - - if (usePrimitiveId) { - SET_REG_FIELD(config, VGT_PRIMITIVEID_EN, PRIMITIVEID_EN, true); - - if (m_pipelineState->getNggControl()->enableNgg) { - // NOTE: If primitive ID is used and there is no GS present, the field NGG_DISABLE_PROVOK_REUSE must be - // set to ensure provoking vertex reuse is disabled in the GE. - if (!m_hasGs) { - SET_REG_FIELD(config, VGT_PRIMITIVEID_EN, NGG_DISABLE_PROVOK_REUSE, true); - } - } - } - } - - if (expCount == 0 && primExpCount == 0) { - // No generic output is present - SET_REG_GFX10_PLUS_FIELD(config, SPI_VS_OUT_CONFIG, NO_PC_EXPORT, true); - } else { - if (expCount > 0) - SET_REG_FIELD(config, SPI_VS_OUT_CONFIG, VS_EXPORT_COUNT, expCount - 1); - - if (primExpCount > 0) { - assert(m_gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ - SET_REG_GFX10_3_PLUS_EXCLUSIVE_FIELD(config, SPI_VS_OUT_CONFIG, PRIM_EXPORT_COUNT, primExpCount); - } - } - - setUsesViewportArrayIndex(useViewportIndex); - - bool disableVertexReuse = m_pipelineState->getInputAssemblyState().disableVertexReuse; - disableVertexReuse |= meshPipeline; // Mesh pipeline always disable vertex reuse - - // According to the IA_VGT_Spec, it is only legal to enable vertex reuse when we're using viewport array - // index if each GS, TES, or VS invocation emits the same viewport array index for each vertex and we set - // VTE_VPORT_PROVOKE_DISABLE. - if (useViewportIndex) { - // TODO: In the future, we can only disable vertex reuse only if viewport array index is emitted divergently - // for each vertex. - disableVertexReuse = true; - SET_REG_FIELD(config, PA_CL_CLIP_CNTL, VTE_VPORT_PROVOKE_DISABLE, true); - } else { - SET_REG_FIELD(config, PA_CL_CLIP_CNTL, VTE_VPORT_PROVOKE_DISABLE, false); - } - - SET_REG_FIELD(config, VGT_REUSE_OFF, REUSE_OFF, disableVertexReuse || m_pipelineState->enableSwXfb()); - - bool miscExport = usePointSize || useEdgeFlag; - if (!meshPipeline) { - // NOTE: Those built-ins are exported through primitive payload for mesh pipeline rather than vertex position data. - miscExport |= useLayer || useViewportIndex || useShadingRate; - } - - if (miscExport) { - SET_REG_FIELD(config, PA_CL_VS_OUT_CNTL, USE_VTX_POINT_SIZE, usePointSize); - - if (meshPipeline) { - if (useShadingRate) { - assert(m_gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ - SET_REG_GFX10_3_PLUS_FIELD(config, PA_CL_VS_OUT_CNTL, BYPASS_VTX_RATE_COMBINER, true); - } - } else { - // NOTE: Those built-ins are exported through primitive payload for mesh pipeline rather than vertex position - // data. - SET_REG_FIELD(config, PA_CL_VS_OUT_CNTL, USE_VTX_RENDER_TARGET_INDX, useLayer); - SET_REG_FIELD(config, PA_CL_VS_OUT_CNTL, USE_VTX_VIEWPORT_INDX, useViewportIndex); - - if (useShadingRate) { - assert(m_gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ - SET_REG_GFX10_3_PLUS_FIELD(config, PA_CL_VS_OUT_CNTL, USE_VTX_VRS_RATE, true); - SET_REG_GFX10_3_PLUS_FIELD(config, PA_CL_VS_OUT_CNTL, BYPASS_PRIM_RATE_COMBINER, true); - } - } - - if (useEdgeFlag) { - SET_REG_FIELD(config, PA_CL_VS_OUT_CNTL, USE_VTX_EDGE_FLAG, true); - } - - SET_REG_FIELD(config, PA_CL_VS_OUT_CNTL, VS_OUT_MISC_VEC_ENA, true); - SET_REG_FIELD(config, PA_CL_VS_OUT_CNTL, VS_OUT_MISC_SIDE_BUS_ENA, true); - } - - if (clipDistanceCount > 0 || cullDistanceCount > 0) { - SET_REG_FIELD(config, PA_CL_VS_OUT_CNTL, VS_OUT_CCDIST0_VEC_ENA, true); - if (clipDistanceCount + cullDistanceCount > 4) { - SET_REG_FIELD(config, PA_CL_VS_OUT_CNTL, VS_OUT_CCDIST1_VEC_ENA, true); - } - - unsigned clipDistanceMask = (1 << clipDistanceCount) - 1; - unsigned cullDistanceMask = ((1 << cullDistanceCount) - 1) << clipDistanceCount; - - // Set fields CLIP_DIST_ENA_0 ~ CLIP_DIST_ENA_7 and CULL_DIST_ENA_0 ~ CULL_DIST_ENA_7 - unsigned paClVsOutCntl = GET_REG(config, PA_CL_VS_OUT_CNTL); - - // Note: Point primitives are only affected by the cull mask, so enable culling also based on clip distances - unsigned finalMask = clipDistanceMask | cullDistanceMask; - paClVsOutCntl |= clipDistanceMask | (finalMask << 8); - SET_REG(config, PA_CL_VS_OUT_CNTL, paClVsOutCntl); - - // On 10.3+ all auxiliary position exports are optimized, not just the misc exports. - if (m_gfxIp >= GfxIpVersion{10, 3}) - SET_REG_FIELD(config, PA_CL_VS_OUT_CNTL, VS_OUT_MISC_SIDE_BUS_ENA, true); - } - - unsigned posCount = 1; // gl_Position is always exported - if (miscExport) - ++posCount; - - if (clipDistanceCount + cullDistanceCount > 0) { - ++posCount; - if (clipDistanceCount + cullDistanceCount > 4) - ++posCount; - } - - SET_REG_FIELD(config, SPI_SHADER_POS_FORMAT, POS0_EXPORT_FORMAT, SPI_SHADER_4COMP); - if (posCount > 1) { - SET_REG_FIELD(config, SPI_SHADER_POS_FORMAT, POS1_EXPORT_FORMAT, SPI_SHADER_4COMP); - } - if (posCount > 2) { - SET_REG_FIELD(config, SPI_SHADER_POS_FORMAT, POS2_EXPORT_FORMAT, SPI_SHADER_4COMP); - } - if (posCount > 3) { - SET_REG_FIELD(config, SPI_SHADER_POS_FORMAT, POS3_EXPORT_FORMAT, SPI_SHADER_4COMP); - } -} - -} // namespace Gfx9 - -} // namespace lgc diff --git a/lgc/patch/Gfx9ConfigBuilder.h b/lgc/patch/Gfx9ConfigBuilder.h deleted file mode 100644 index 28d0b6bdde..0000000000 --- a/lgc/patch/Gfx9ConfigBuilder.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ -/** - *********************************************************************************************************************** - * @file Gfx9ConfigBuilder.h - * @brief LLPC header file: contains declaration of class lgc::Gfx9::ConfigBuilder. - *********************************************************************************************************************** - */ -#pragma once - -#include "ConfigBuilderBase.h" -#include "Gfx9Chip.h" - -namespace lgc { - -namespace Gfx9 { - -// ===================================================================================================================== -// Represents the builder to generate register configurations for GFX9-generation chips. -class ConfigBuilder : public ConfigBuilderBase { -public: - ConfigBuilder(llvm::Module *module, PipelineState *pipelineState) : ConfigBuilderBase(module, pipelineState) {} - - void buildPalMetadata(); - - void buildPipelineVsFsRegConfig(); - void buildPipelineVsTsFsRegConfig(); - void buildPipelineVsGsFsRegConfig(); - void buildPipelineVsTsGsFsRegConfig(); - - void buildPipelineNggVsFsRegConfig(); - void buildPipelineNggVsTsFsRegConfig(); - void buildPipelineNggVsGsFsRegConfig(); - void buildPipelineNggVsTsGsFsRegConfig(); - - void buildPipelineMeshFsConfig(); - void buildPipelineTaskMeshFsConfig(); - - void buildPipelineCsRegConfig(); - -private: - ConfigBuilder() = delete; - ConfigBuilder(const ConfigBuilder &) = delete; - ConfigBuilder &operator=(const ConfigBuilder &) = delete; - - template void buildVsRegConfig(ShaderStageEnum shaderStage, T *config); - template void buildLsHsRegConfig(ShaderStageEnum shaderStage1, ShaderStageEnum shaderStage2, T *config); - template void buildEsGsRegConfig(ShaderStageEnum shaderStage1, ShaderStageEnum shaderStage2, T *config); - template - void buildPrimShaderRegConfig(ShaderStageEnum shaderStage1, ShaderStageEnum shaderStage2, T *config); - template void buildPsRegConfig(ShaderStageEnum shaderStage, T *config); - template void buildMeshRegConfig(ShaderStageEnum shaderStage, T *config); - void buildCsRegConfig(ShaderStageEnum shaderStage, CsRegConfig *config); - - void setupVgtTfParam(LsHsRegConfig *config); - template void setupPaSpecificRegisters(T *config); -}; - -} // namespace Gfx9 - -} // namespace lgc diff --git a/lgc/patch/LowerCooperativeMatrix.cpp b/lgc/patch/LowerCooperativeMatrix.cpp index cf880b63a7..5e6c615ddf 100644 --- a/lgc/patch/LowerCooperativeMatrix.cpp +++ b/lgc/patch/LowerCooperativeMatrix.cpp @@ -482,8 +482,8 @@ Value *LowerCooperativeMatrix::cooperativeMatrixLoadInternal(Value *dataPtr, Val bool isTemporal = memoryAccess & (unsigned)(CooperativeMatrixMemoryAccess::MemoryAccessTemporalMask); auto props = getTypeProperties(elemType, layout); - auto addrInfo = computeAddressing(layout, elemType, waveSize, stride, isColMajor, insertPos); + auto addrInfo = computeAddressing(layout, elemType, waveSize, stride, isColMajor, insertPos); Value *vecVal = PoisonValue::get(FixedVectorType::get(elemTy, props.numFlatElements)); for (unsigned idx = 0; idx < props.numFlatElements; ++idx) { Value *macroOffset = builder.CreateMul(addrInfo.macroStep, builder.getInt32(idx / addrInfo.microCount)); diff --git a/lgc/patch/LowerDesc.cpp b/lgc/patch/LowerDesc.cpp index 7550d17a83..9a428cbd00 100644 --- a/lgc/patch/LowerDesc.cpp +++ b/lgc/patch/LowerDesc.cpp @@ -52,7 +52,11 @@ PreservedAnalyses LowerDesc::run(Module &module, ModuleAnalysisManager &analysis PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); m_pipelineState = pipelineState; - static const auto visitor = llvm_dialects::VisitorBuilder().add(&LowerDesc::visitLoadBufferDesc).build(); + static const auto visitor = llvm_dialects::VisitorBuilder() + .add(&LowerDesc::visitLoadBufferAddr) + .add(&LowerDesc::visitLoadBufferDesc) + .add(&LowerDesc::visitLoadStridedBufferDesc) + .build(); visitor.visit(*this, module); @@ -64,6 +68,28 @@ PreservedAnalyses LowerDesc::run(Module &module, ModuleAnalysisManager &analysis return PreservedAnalyses::allInSet(); } +// ===================================================================================================================== +// Lower a load.buffer.addr operation. The result is an i64. +// +// @param op : the operation +void LowerDesc::visitLoadBufferAddr(LoadBufferAddrOp &op) { + BuilderImpl builder(m_pipelineState); + builder.setShaderStage(getShaderStage(op.getFunction())); + builder.SetInsertPoint(&op); + + // BufferFlagAddress only supports the case where the descriptor is a compact descriptor. This op supports + // normal descriptors, extracting the 48-bit address out of the descriptor. + unsigned flags = op.getFlags() & ~Builder::BufferFlagAddress; + Value *desc = builder.CreateBufferDesc(op.getDescSet(), op.getBinding(), op.getDescIndex(), flags); + m_toErase.push_back(&op); + + // Extract 48-bit address out of <4 x i32> descriptor, resulting in an i64. + Value *addr = builder.CreateShuffleVector(desc, desc, {0, 1}); + addr = builder.CreateBitCast(addr, builder.getInt64Ty()); + addr = builder.CreateAnd(addr, builder.getInt64(0x0000ffffffffffffULL)); + op.replaceAllUsesWith(addr); +} + // ===================================================================================================================== // Lower a load.buffer.desc operation // @@ -85,4 +111,27 @@ void LowerDesc::visitLoadBufferDesc(LoadBufferDescOp &op) { // Convert to fat pointer. op.replaceAllUsesWith(builder.create(desc)); } + +// ===================================================================================================================== +// Lower a load.strided.buffer.desc operation +// +// @param op : the operation +void LowerDesc::visitLoadStridedBufferDesc(LoadStridedBufferDescOp &op) { + BuilderImpl builder(m_pipelineState); + builder.setShaderStage(getShaderStage(op.getFunction())); + builder.SetInsertPoint(&op); + + unsigned flags = op.getFlags(); + // Anyone who wants to get a 64-bit buffer descriptor address should call `CreateBufferDesc` directly. (This is only + // available in LGC as we don't expect front-end would required such usage.) + assert(!(flags & Builder::BufferFlagAddress) && + "Returning a 64-bit address is unsupported by lgc.load.strided.buffer.desc"); + + Value *desc = + builder.CreateStridedBufferDesc(op.getDescSet(), op.getBinding(), op.getDescIndex(), flags, op.getStride()); + + m_toErase.push_back(&op); + + op.replaceAllUsesWith(builder.create(desc)); +} } // namespace lgc diff --git a/lgc/patch/MeshTaskShader.cpp b/lgc/patch/MeshTaskShader.cpp index 4242a871f5..8666090720 100644 --- a/lgc/patch/MeshTaskShader.cpp +++ b/lgc/patch/MeshTaskShader.cpp @@ -29,7 +29,6 @@ *********************************************************************************************************************** */ #include "MeshTaskShader.h" -#include "Gfx9Chip.h" #include "ShaderMerger.h" #include "lgc/patch/Patch.h" #include "lgc/util/Debug.h" @@ -70,16 +69,14 @@ MeshTaskShader::~MeshTaskShader() { // @param pipelineState : Pipeline state // @param entryPoint : Entry-point of mesh shader // @param ldsLayout : Mesh shader LDS layout (could be null) +// @param outputsLayout : Mesh shader outputs layout (could be null) unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Function *entryPoint, - MeshLdsLayout *ldsLayout) { + MeshLdsLayout *ldsLayout, MeshOutputsLayout *outputsLayout) { if (!pipelineState->hasShaderStage(ShaderStage::Mesh)) return 0; // Mesh shader absent (standalone compiler tries to compile a single task shader) - assert(getShaderStage(entryPoint) == ShaderStage::Mesh); // Must be mesh shader - - auto gfxIp = pipelineState->getTargetInfo().getGfxIpVersion(); - assert(gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ - (void(gfxIp)); // Unused + assert(getShaderStage(entryPoint) == ShaderStage::Mesh); // Must be mesh shader + assert(pipelineState->getTargetInfo().getGfxIpVersion() >= GfxIpVersion({10, 3})); // Must be GFX10.3+ // // The LDS layout of mesh shader is something as follow (consists of two main parts): @@ -101,8 +98,8 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct // auto meshMode = pipelineState->getShaderModes()->getMeshShaderMode(); - assert(meshMode.outputVertices <= Gfx9::NggMaxThreadsPerSubgroup); - assert(meshMode.outputPrimitives <= Gfx9::NggMaxThreadsPerSubgroup); + assert(meshMode.outputVertices <= NggMaxThreadsPerSubgroup); + assert(meshMode.outputPrimitives <= NggMaxThreadsPerSubgroup); const auto resUsage = pipelineState->getShaderResourceUsage(ShaderStage::Mesh); @@ -117,6 +114,21 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct LLPC_OUTS("\n"); }; + auto printOutputLayoutInfo = [=](unsigned location, unsigned numComponents, unsigned relativeOffset, + BuiltInKind forBuiltIn) { + if (numComponents > 4) { + LLPC_OUTS(format("-- location = %u-%u, components = %u, offset = %u", location, location + 1, numComponents, + relativeOffset)); + } else { + LLPC_OUTS(format("-- location = %u, components = %u, offset = %u", location, numComponents, relativeOffset)); + } + + if (forBuiltIn != InvalidValue) + LLPC_OUTS(" (builtin = " << PipelineState::getBuiltInName(forBuiltIn) << ")"); + + LLPC_OUTS("\n"); + }; + if (ldsLayout) { LLPC_OUTS("===============================================================================\n"); LLPC_OUTS("// LLPC mesh shader LDS region info (in dwords) and general info\n\n"); @@ -170,21 +182,86 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct meshLdsSizeInDwords += ldsRegionSize; // Per-vertex outputs - const unsigned vertexStride = 4 * resUsage->inOutUsage.outputMapLocCount; // Corresponds to vec4 output + auto &vertexOutputComponents = resUsage->inOutUsage.mesh.vertexOutputComponents; + unsigned vertexStride = 0; + for (auto &vertexOutput : vertexOutputComponents) { + const auto numComponents = vertexOutput.second.first; + vertexStride += numComponents; // Calculate total number of components of vertex outputs + } + ldsRegionSize = vertexStride * meshMode.outputVertices; if (ldsLayout) { printLdsRegionInfo("Per-vertex Output", ldsOffsetInDwords, ldsRegionSize); (*ldsLayout)[MeshLdsRegion::VertexOutput] = std::make_pair(ldsOffsetInDwords, ldsRegionSize); + + assert(outputsLayout); + outputsLayout->vertexStride = vertexStride; + + unsigned offsetInVertex = 0; + unsigned vertexExportCount = 0; + + for (auto &vertexOutput : vertexOutputComponents) { + const auto location = vertexOutput.first; + const auto &[numComponents, forBuiltIn] = vertexOutput.second; + + outputsLayout->offsetsInVertex[location] = offsetInVertex; // Map output locations to relative offsets in vertex + offsetInVertex += numComponents; + + // Skip those special outputs mapped from vertex built-ins, don't count them in at present + if (forBuiltIn == InvalidValue) + vertexExportCount += (numComponents > 4 ? 2 : 1); + } + + // Consider those special outputs mapped from vertex built-ins + for (auto &vertexExport : resUsage->inOutUsage.mesh.vertexBuiltInExportSlots) { + const unsigned exportSlot = vertexExport.second; + vertexExportCount = std::max(vertexExportCount, exportSlot + 1); + } + outputsLayout->vertexExportCount = vertexExportCount; + ldsOffsetInDwords += ldsRegionSize; } meshLdsSizeInDwords += ldsRegionSize; // Per-primitive outputs - const unsigned primitiveStride = 4 * resUsage->inOutUsage.perPrimitiveOutputMapLocCount; // Corresponds to vec4 output + auto &primitiveOutputComponents = resUsage->inOutUsage.mesh.primitiveOutputComponents; + unsigned primitiveStride = 0; + for (auto &primitiveOutput : primitiveOutputComponents) { + const auto numComponents = primitiveOutput.second.first; + primitiveStride += numComponents; // Calculate total number of components of primitive outputs + } + ldsRegionSize = primitiveStride * meshMode.outputPrimitives; if (ldsLayout) { printLdsRegionInfo("Per-primitive Output", ldsOffsetInDwords, ldsRegionSize); (*ldsLayout)[MeshLdsRegion::PrimitiveOutput] = std::make_pair(ldsOffsetInDwords, ldsRegionSize); + + assert(outputsLayout); + outputsLayout->primitiveStride = primitiveStride; + + unsigned offsetInPrimitive = 0; + unsigned primitiveExportCount = 0; + + for (auto &primitiveOutput : primitiveOutputComponents) { + const auto location = primitiveOutput.first; + const auto &[numComponents, forBuiltIn] = primitiveOutput.second; + + outputsLayout->offsetsInPrimitive[location] = + offsetInPrimitive; // Map output locations to relative offsets in primitive + offsetInPrimitive += numComponents; + + // Skip those special outputs mapped from primitive built-ins, don't count them in at present + if (forBuiltIn == InvalidValue) + primitiveExportCount += (numComponents > 4 ? 2 : 1); + } + + // Consider those special outputs mapped from primitive built-ins + for (auto &primitiveExport : resUsage->inOutUsage.mesh.primitiveBuiltInExportSlots) { + const unsigned exportSlot = primitiveExport.second; + primitiveExportCount = std::max(primitiveExportCount, exportSlot + 1); + } + outputsLayout->primitiveExportCount = primitiveExportCount; + ldsOffsetInDwords += ldsRegionSize; } meshLdsSizeInDwords += ldsRegionSize; @@ -235,6 +312,27 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct printLdsRegionInfo("Internal Mesh LDS", 0, meshLdsSizeInDwords); printLdsRegionInfo("Shared Variable LDS", 0, sharedVarLdsSizeInDwords); printLdsRegionInfo("Total LDS", 0, meshLdsSizeInDwords + sharedVarLdsSizeInDwords); + + if (!outputsLayout->offsetsInVertex.empty()) { + LLPC_OUTS("\nVertex Outputs Layout (stride = " << outputsLayout->vertexStride + << ", exports = " << outputsLayout->vertexExportCount << "):\n"); + for (auto &vertexOutput : outputsLayout->offsetsInVertex) { + const auto &[location, offsetInVertex] = vertexOutput; + const auto &[numComponents, forBuiltIn] = vertexOutputComponents[location]; + printOutputLayoutInfo(location, numComponents, offsetInVertex, forBuiltIn); + } + } + + if (!outputsLayout->offsetsInPrimitive.empty()) { + LLPC_OUTS("\nPrimitive outputs layout (stride = " << outputsLayout->primitiveStride << ", exports = " + << outputsLayout->primitiveExportCount << "):\n"); + for (auto &primitiveOutput : outputsLayout->offsetsInPrimitive) { + const auto &[location, offsetInPrimitive] = primitiveOutput; + const auto &[numComponents, forBuiltIn] = primitiveOutputComponents[location]; + printOutputLayoutInfo(location, numComponents, offsetInPrimitive, forBuiltIn); + } + } + LLPC_OUTS("\n"); LLPC_OUTS("Workgroup Size (X, Y, Z) = (" << meshMode.workgroupSizeX << ", " << meshMode.workgroupSizeY << ", " << meshMode.workgroupSizeZ << ")\n"); @@ -259,7 +357,7 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct LLPC_OUTS("Max Vertices = " << meshMode.outputVertices << ", Max Primitives = " << meshMode.outputPrimitives << "\n"); if (!meshSharedVars.empty()) { - LLPC_OUTS("Shared variables:\n"); + LLPC_OUTS("Shared Variables:\n"); for (auto meshSharedVar : meshSharedVars) { assert(meshSharedVar->getAlignment() == 4); // Must be 1 dword const auto sizeInBytes = @@ -267,8 +365,8 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct assert(sizeInBytes % 4 == 0); // Must be multiple of 4 const auto sizeInDwords = sizeInBytes / 4; - LLPC_OUTS("Name = " << meshSharedVar->getName() << ", Type = " << getTypeName(meshSharedVar->getValueType()) - << ", Size (in dwords) = " << sizeInDwords << "\n"); + LLPC_OUTS("-- name = " << meshSharedVar->getName() << ", type = " << getTypeName(meshSharedVar->getValueType()) + << ", size (in dwords) = " << sizeInDwords << "\n"); } } LLPC_OUTS("\n"); @@ -404,7 +502,7 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { // - SetMeshPrimitiveCulled -> Write null primitive flag to LDS // - GetMeshBuiltinInput -> Lower mesh built-in input // - TaskPayloadPtr -> Transform task payload descriptor - // - WriteMeshVertexOutput/WriteMeshPrimitiveOutput -> Write output data to LDS + // - WriteMeshOutput -> Write output data to LDS // } // // Barrier (if needBarrierFlag) @@ -464,7 +562,7 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { const unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Mesh); // Setup LDS layout - layoutMeshShaderLds(m_pipelineState, entryPoint, &m_ldsLayout); + layoutMeshShaderLds(m_pipelineState, entryPoint, &m_ldsLayout, &m_outputsLayout); m_lds = getOrCreateMeshLds(entryPoint->getParent()); // Mutate mesh shader entry-point @@ -1342,55 +1440,60 @@ void MeshTaskShader::lowerSetMeshPrimitiveCulled(SetMeshPrimitiveCulledOp &setMe } // ===================================================================================================================== -// Lower write mesh vertex output. Write mesh shader vertex outputs to LDS. -// -// @param writeMeshVertexOutputOp : Call instruction op to write vertex output for mesh shader -void MeshTaskShader::lowerWriteMeshVertexOutput(WriteMeshVertexOutputOp &writeMeshVertexOutputOp) { - m_builder.SetInsertPoint(&writeMeshVertexOutputOp); - - assert(getShaderStage(writeMeshVertexOutputOp.getFunction()) == ShaderStage::Mesh); - - auto outputOffset = writeMeshVertexOutputOp.getOutputOffset(); - auto vertexIndex = writeMeshVertexOutputOp.getVertexIndex(); - auto outputValue = writeMeshVertexOutputOp.getOutputValue(); - - const auto resUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh); - const unsigned vertexStride = 4 * resUsage->inOutUsage.outputMapLocCount; // Corresponds to vec4 output - - Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::VertexOutput)); - Value *ldsOffset = m_builder.CreateMul(vertexIndex, m_builder.getInt32(vertexStride)); - ldsOffset = m_builder.CreateAdd(ldsOffset, outputOffset); - ldsOffset = m_builder.CreateAdd(ldsStart, ldsOffset); - - writeValueToLds(outputValue, ldsOffset); - - m_callsToRemove.push_back(&writeMeshVertexOutputOp); -} - -// ===================================================================================================================== -// Lower write mesh primitive output. Write mesh shader primitive outputs to LDS. +// Lower write mesh vertex/primitive output. Write mesh shader vertex/primitive outputs to LDS. // -// @param writeMeshPrimitiveOutputOp : Call instruction op to write primitive output for mesh shader -void MeshTaskShader::lowerWriteMeshPrimitiveOutput(WriteMeshPrimitiveOutputOp &writeMeshPrimitiveOutputOp) { - m_builder.SetInsertPoint(&writeMeshPrimitiveOutputOp); - - assert(getShaderStage(writeMeshPrimitiveOutputOp.getFunction()) == ShaderStage::Mesh); +// @param WriteMeshOutputOp : Call instruction op to write vertex/primitive output for mesh shader +void MeshTaskShader::lowerWriteMeshOutput(WriteMeshOutputOp &writeMeshOutputOp) { + m_builder.SetInsertPoint(&writeMeshOutputOp); + + assert(getShaderStage(writeMeshOutputOp.getFunction()) == ShaderStage::Mesh); + + auto isPrimitive = writeMeshOutputOp.getIsPrimitive(); + auto location = writeMeshOutputOp.getLocation(); + auto locationOffset = writeMeshOutputOp.getLocationOffset(); + auto componentIndex = writeMeshOutputOp.getComponentIndex(); + auto primOrVertexIndex = writeMeshOutputOp.getPrimOrVertexIndex(); + auto outputValue = writeMeshOutputOp.getOutputValue(); + + auto &outputComponents = + isPrimitive + ? m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.primitiveOutputComponents + : m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.vertexOutputComponents; + + // ldsOffset = ldsStart + primOrVertexIndex * primOrVertexStride + + // offsetInPrimOrVertex + locationIndex * numComponents + componentIndex + Value *ldsStart = m_builder.getInt32( + getMeshShaderLdsRegionStart(isPrimitive ? MeshLdsRegion::PrimitiveOutput : MeshLdsRegion::VertexOutput)); + const unsigned primOrVertexStride = isPrimitive ? m_outputsLayout.primitiveStride : m_outputsLayout.vertexStride; + Value *primOrVertexOffset = m_builder.CreateMul(primOrVertexIndex, m_builder.getInt32(primOrVertexStride)); + + Value *offsetInPrimOrVertex = m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, isPrimitive)); + if (locationOffset != m_builder.getInt32(0)) { + auto locationIndex = locationOffset; + + assert(outputComponents.count(location) > 0); // Must exist + unsigned numComponents = outputComponents[location].first; + + if (numComponents > 4) { + // NOTE: Here we encounter 64-bit vec3/vec4 data types. Such types will occupy two consecutive locations and the + // provided location offset must be divided by 2 to get real location index. + locationIndex = m_builder.CreateLShr(locationOffset, 2); + } - auto outputOffset = writeMeshPrimitiveOutputOp.getOutputOffset(); - auto primitiveIndex = writeMeshPrimitiveOutputOp.getPrimitiveIndex(); - auto outputValue = writeMeshPrimitiveOutputOp.getOutputValue(); + offsetInPrimOrVertex = m_builder.CreateAdd(offsetInPrimOrVertex, + m_builder.CreateMul(locationIndex, m_builder.getInt32(numComponents))); + } - const auto resUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh); - const unsigned primitiveStride = 4 * resUsage->inOutUsage.perPrimitiveOutputMapLocCount; // Corresponds to vec4 output + if (componentIndex != m_builder.getInt32(0)) + offsetInPrimOrVertex = m_builder.CreateAdd(offsetInPrimOrVertex, componentIndex); - Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveOutput)); - Value *ldsOffset = m_builder.CreateMul(primitiveIndex, m_builder.getInt32(primitiveStride)); - ldsOffset = m_builder.CreateAdd(ldsOffset, outputOffset); - ldsOffset = m_builder.CreateAdd(ldsStart, ldsOffset); + auto ldsOffset = ldsStart; + ldsOffset = m_builder.CreateAdd(ldsOffset, primOrVertexOffset); + ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInPrimOrVertex); writeValueToLds(outputValue, ldsOffset); - m_callsToRemove.push_back(&writeMeshPrimitiveOutputOp); + m_callsToRemove.push_back(&writeMeshOutputOp); } // ===================================================================================================================== @@ -1703,8 +1806,7 @@ void MeshTaskShader::lowerMeshShaderBody(BasicBlock *apiMeshEntryBlock, BasicBlo .add(&MeshTaskShader::lowerSetMeshPrimitiveIndices) .add(&MeshTaskShader::lowerSetMeshPrimitiveCulled) .add(&MeshTaskShader::lowerGetMeshBuiltinInput) - .add(&MeshTaskShader::lowerWriteMeshVertexOutput) - .add(&MeshTaskShader::lowerWriteMeshPrimitiveOutput) + .add(&MeshTaskShader::lowerWriteMeshOutput) .build(); visitor.visit(*this, *entryPoint); @@ -1835,36 +1937,60 @@ void MeshTaskShader::exportPrimitive() { // Primitive attribute export follows vertex attribute export SmallVector primAttrExports; - unsigned startLoc = inOutUsage.mesh.genericOutputMapLocCount; - for (auto &builtInExport : inOutUsage.mesh.builtInExportLocs) { - const unsigned exportLoc = builtInExport.second; - startLoc = std::max(startLoc, exportLoc + 1); - } + unsigned startSlot = m_outputsLayout.vertexExportCount; // Export primitive attributes (from generic outputs) ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveOutput)); - auto primitiveStride = 4 * inOutUsage.perPrimitiveOutputMapLocCount; - auto ldsOffsetBase = m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(primitiveStride)); - ldsOffsetBase = m_builder.CreateAdd(ldsStart, ldsOffsetBase); + auto primitiveOffset = + m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.primitiveStride)); + + auto &primitiveOutputComponents = + m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.primitiveOutputComponents; + unsigned exportSlot = startSlot; + for (auto &primitiveOutput : primitiveOutputComponents) { + const auto location = primitiveOutput.first; + const auto &[numComponents, forBuiltIn] = primitiveOutput.second; + assert(numComponents > 0); + + if (forBuiltIn != InvalidValue) + continue; // Skip those special outputs mapped from primitive built-ins. They will be handled later on. - for (unsigned loc = 0; loc < inOutUsage.mesh.perPrimitiveGenericOutputMapLocCount; ++loc) { - auto ldsOffset = m_builder.CreateAdd(ldsOffsetBase, m_builder.getInt32(4 * loc)); - auto exportValue = readValueFromLds(FixedVectorType::get(m_builder.getFloatTy(), 4), ldsOffset); + auto offsetInPrimitive = m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, true)); - std::array exportValues; - for (unsigned j = 0; j < 4; ++j) - exportValues[j] = m_builder.CreateExtractElement(exportValue, j); + auto ldsOffset = ldsStart; + ldsOffset = m_builder.CreateAdd(ldsOffset, primitiveOffset); + ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInPrimitive); - primAttrExports.push_back({startLoc + loc, exportValues}); + auto exportValue = readValueFromLds(FixedVectorType::get(m_builder.getFloatTy(), numComponents), ldsOffset); + + SmallVector exporteValues; + for (unsigned i = 0; i < numComponents; ++i) + exporteValues.push_back(m_builder.CreateExtractElement(exportValue, i)); + + // Do array padding + if (numComponents <= 4) { + while (exporteValues.size() < 4) // <4 x float> + exporteValues.push_back(nullptr); + } else { + while (exporteValues.size() < 8) // <8 x float> + exporteValues.push_back(nullptr); + } + + primAttrExports.push_back({exportSlot++, exporteValues[0], exporteValues[1], exporteValues[2], exporteValues[3]}); ++inOutUsage.primExpCount; + + if (numComponents > 4) { + primAttrExports.push_back({exportSlot++, exporteValues[4], exporteValues[5], exporteValues[6], exporteValues[7]}); + ++inOutUsage.primExpCount; + } } // Export primitive attributes (from built-ins as generic ones) if (builtInUsage.primitiveId) { - if (inOutUsage.mesh.perPrimitiveBuiltInExportLocs.count(BuiltInPrimitiveId) > 0) { + if (inOutUsage.mesh.primitiveBuiltInExportSlots.count(BuiltInPrimitiveId) > 0) { assert(primitiveId); - const unsigned exportLoc = inOutUsage.mesh.perPrimitiveBuiltInExportLocs[BuiltInPrimitiveId]; - primAttrExports.push_back({startLoc + exportLoc, primitiveId}); + const unsigned exportSlot = inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInPrimitiveId]; + primAttrExports.push_back({startSlot + exportSlot, primitiveId}); ++inOutUsage.primExpCount; } } @@ -1886,10 +2012,10 @@ void MeshTaskShader::exportPrimitive() { } if (exportLayer) { - if (inOutUsage.mesh.perPrimitiveBuiltInExportLocs.count(BuiltInLayer) > 0) { + if (inOutUsage.mesh.primitiveBuiltInExportSlots.count(BuiltInLayer) > 0) { assert(fsLayer); - const unsigned exportLoc = inOutUsage.mesh.perPrimitiveBuiltInExportLocs[BuiltInLayer]; - primAttrExports.push_back({startLoc + exportLoc, fsLayer}); + const unsigned exportSlot = inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInLayer]; + primAttrExports.push_back({startSlot + exportSlot, fsLayer}); ++inOutUsage.primExpCount; } } @@ -1911,10 +2037,10 @@ void MeshTaskShader::exportPrimitive() { } if (exportViewportIndex) { - if (inOutUsage.mesh.perPrimitiveBuiltInExportLocs.count(BuiltInViewportIndex) > 0) { + if (inOutUsage.mesh.primitiveBuiltInExportSlots.count(BuiltInViewportIndex) > 0) { assert(fsViewportIndex); - const unsigned exportLoc = inOutUsage.mesh.perPrimitiveBuiltInExportLocs[BuiltInViewportIndex]; - primAttrExports.push_back({startLoc + exportLoc, fsViewportIndex}); + const unsigned exportSlot = inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInViewportIndex]; + primAttrExports.push_back({startSlot + exportSlot, fsViewportIndex}); ++inOutUsage.primExpCount; } } @@ -2002,21 +2128,48 @@ void MeshTaskShader::exportVertex() { // Export vertex attributes (from generic outputs) Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::VertexOutput)); - auto vertexStride = 4 * inOutUsage.outputMapLocCount; - auto ldsOffsetBase = m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(vertexStride)); - ldsOffsetBase = m_builder.CreateAdd(ldsStart, ldsOffsetBase); + auto vertexOffset = + m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.vertexStride)); + + auto &vertexOutputComponents = + m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.vertexOutputComponents; + unsigned exportSlot = 0; + for (auto &vertexOutput : vertexOutputComponents) { + const auto location = vertexOutput.first; + const auto &[numComponents, forBuiltIn] = vertexOutput.second; + assert(numComponents > 0); - for (unsigned i = 0; i < inOutUsage.mesh.genericOutputMapLocCount; ++i) { - auto ldsOffset = m_builder.CreateAdd(ldsOffsetBase, m_builder.getInt32(4 * i)); - auto exportValue = readValueFromLds(FixedVectorType::get(m_builder.getFloatTy(), 4), ldsOffset); + if (forBuiltIn != InvalidValue) + continue; // Skip those special outputs mapped from vertex built-ins. They will be handled later on. - std::array exportValues = {m_builder.CreateExtractElement(exportValue, static_cast(0)), - m_builder.CreateExtractElement(exportValue, 1), - m_builder.CreateExtractElement(exportValue, 2), - m_builder.CreateExtractElement(exportValue, 3)}; + auto offsetInVertex = m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, false)); + + auto ldsOffset = ldsStart; + ldsOffset = m_builder.CreateAdd(ldsOffset, vertexOffset); + ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInVertex); + + auto exportValue = readValueFromLds(FixedVectorType::get(m_builder.getFloatTy(), numComponents), ldsOffset); + + SmallVector exporteValues; + for (unsigned i = 0; i < numComponents; ++i) + exporteValues.push_back(m_builder.CreateExtractElement(exportValue, i)); + + // Do array padding + if (numComponents <= 4) { + while (exporteValues.size() < 4) // <4 x float> + exporteValues.push_back(nullptr); + } else { + while (exporteValues.size() < 8) // <8 x float> + exporteValues.push_back(nullptr); + } - vertAttrExports.push_back({i, exportValues}); + vertAttrExports.push_back({exportSlot++, exporteValues[0], exporteValues[1], exporteValues[2], exporteValues[3]}); ++inOutUsage.expCount; + + if (numComponents > 4) { + vertAttrExports.push_back({exportSlot++, exporteValues[4], exporteValues[5], exporteValues[6], exporteValues[7]}); + ++inOutUsage.expCount; + } } // Export vertex attributes (from built-ins as generic ones) @@ -2057,23 +2210,23 @@ void MeshTaskShader::exportVertex() { } if (exportClipCullDistance) { - unsigned exportLoc = InvalidValue; - if (inOutUsage.mesh.builtInExportLocs.count(BuiltInClipDistance) > 0) { - exportLoc = inOutUsage.mesh.builtInExportLocs[BuiltInClipDistance]; + unsigned exportSlot = InvalidValue; + if (inOutUsage.mesh.vertexBuiltInExportSlots.count(BuiltInClipDistance) > 0) { + exportSlot = inOutUsage.mesh.vertexBuiltInExportSlots[BuiltInClipDistance]; } else { - assert(inOutUsage.mesh.builtInExportLocs.count(BuiltInCullDistance) > 0); - exportLoc = inOutUsage.mesh.builtInExportLocs[BuiltInCullDistance]; + assert(inOutUsage.mesh.vertexBuiltInExportSlots.count(BuiltInCullDistance) > 0); + exportSlot = inOutUsage.mesh.vertexBuiltInExportSlots[BuiltInCullDistance]; } - assert(exportLoc != InvalidValue); + assert(exportSlot != InvalidValue); vertAttrExports.push_back( - {exportLoc, {clipCullDistances[0], clipCullDistances[1], clipCullDistances[2], clipCullDistances[3]}}); + {exportSlot, {clipCullDistances[0], clipCullDistances[1], clipCullDistances[2], clipCullDistances[3]}}); ++inOutUsage.expCount; if (clipCullDistances.size() > 4) { // Do the second exporting vertAttrExports.push_back( - {exportLoc + 1, {clipCullDistances[4], clipCullDistances[5], clipCullDistances[6], clipCullDistances[7]}}); + {exportSlot + 1, {clipCullDistances[4], clipCullDistances[5], clipCullDistances[6], clipCullDistances[7]}}); ++inOutUsage.expCount; } } @@ -2198,14 +2351,14 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef exports) { if (kind == ExportKind::Pos || kind == ExportKind::Prim) { m_builder.CreateIntrinsic(Intrinsic::amdgcn_exp_row, valueTy, { - m_builder.getInt32(target + exports[i].index), // tgt - m_builder.getInt32(validMask), // en - values[0], // src0 - values[1] ? values[1] : poison, // src1 - values[2] ? values[2] : poison, // src2 - values[3] ? values[3] : poison, // src3 - m_builder.getInt1(exportDone), // done - m_waveThreadInfo.rowInSubgroup, // row number + m_builder.getInt32(target + exports[i].slot), // tgt + m_builder.getInt32(validMask), // en + values[0], // src0 + values[1] ? values[1] : poison, // src1 + values[2] ? values[2] : poison, // src2 + values[3] ? values[3] : poison, // src3 + m_builder.getInt1(exportDone), // done + m_waveThreadInfo.rowInSubgroup, // row number }); } else { assert(kind == ExportKind::VertAttr || kind == ExportKind::PrimAttr); @@ -2216,17 +2369,17 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef exports) { valueToStore = m_builder.CreateInsertElement(valueToStore, values[j], j); } - // ringOffset = attribRingBaseOffset + 32 * exportIndex * 16 - // = attribRingBaseOffset + exportIndex * 512 - unsigned exportIndex = exports[i].index; + // ringOffset = attribRingBaseOffset + 32 * exportSlot * 16 + // = attribRingBaseOffset + exportSlot * 512 + unsigned exportSlot = exports[i].slot; if (kind == ExportKind::PrimAttr && m_hasNoVertexAttrib) { // NOTE: HW allocates and manages attribute ring based on the register fields: VS_EXPORT_COUNT and // PRIM_EXPORT_COUNT. When VS_EXPORT_COUNT = 0, HW assumes there is still a vertex attribute exported even // though this is not what we want. Hence, we should reserve param0 as a dummy vertex attribute and all // primitive attributes are moved after it. - ++exportIndex; + ++exportSlot; } - auto locationOffset = m_builder.getInt32(exportIndex * SizeOfVec4); + auto locationOffset = m_builder.getInt32(exportSlot * SizeOfVec4); CoherentFlag coherent = {}; if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 11) { @@ -2240,14 +2393,14 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef exports) { } else { m_builder.CreateIntrinsic(Intrinsic::amdgcn_exp, valueTy, { - m_builder.getInt32(target + exports[i].index), // tgt - m_builder.getInt32(validMask), // en - values[0], // src0 - values[1] ? values[1] : poison, // src1 - values[2] ? values[2] : poison, // src2 - values[3] ? values[3] : poison, // src3 - m_builder.getInt1(exportDone), // done - m_builder.getFalse(), // vm + m_builder.getInt32(target + exports[i].slot), // tgt + m_builder.getInt32(validMask), // en + values[0], // src0 + values[1] ? values[1] : poison, // src1 + values[2] ? values[2] : poison, // src2 + values[3] ? values[3] : poison, // src3 + m_builder.getInt1(exportDone), // done + m_builder.getFalse(), // vm }); } } @@ -2259,32 +2412,14 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef exports) { void MeshTaskShader::prepareAttribRingAccess() { assert(m_gfxIp.major >= 11); // Must be GFX11+ - // The allocated numbers of vertex/primitive attributes are something as follow: - // 1. Generic vertex attributes - // 2. Vertex attributes mapped from vertex builtins - // 3. Generic primitive attributes - // 4. Primitive attributes mapped from primitive builtins - const auto &inOutUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh; - unsigned vertAttribCount = inOutUsage.genericOutputMapLocCount; - for (auto &builtInExport : inOutUsage.builtInExportLocs) { - const unsigned exportLoc = builtInExport.second; - vertAttribCount = std::max(vertAttribCount, exportLoc + 1); - } - - unsigned primAttribCount = inOutUsage.perPrimitiveGenericOutputMapLocCount; - for (auto &perPrimitiveBuiltInExport : inOutUsage.perPrimitiveBuiltInExportLocs) { - const unsigned exportLoc = perPrimitiveBuiltInExport.second; - primAttribCount = std::max(primAttribCount, exportLoc + 1); - } - - unsigned attribCount = vertAttribCount + primAttribCount; + unsigned attribCount = m_outputsLayout.vertexExportCount + m_outputsLayout.primitiveExportCount; if (attribCount == 0) return; // No attribute export // NOTE: HW allocates and manages attribute ring based on the register fields: VS_EXPORT_COUNT and PRIM_EXPORT_COUNT. // When VS_EXPORT_COUNT = 0, HW assumes there is still a vertex attribute exported even though this is not what we // want. Hence, we should reserve param0 as a dummy vertex attribute. - if (vertAttribCount == 0) { + if (m_outputsLayout.vertexExportCount == 0) { m_hasNoVertexAttrib = true; ++attribCount; // Count in this dummy vertex attribute } @@ -2568,19 +2703,25 @@ Value *MeshTaskShader::readMeshBuiltInFromLds(BuiltInKind builtIn) { break; } - Value *ldsOffset = nullptr; + // ldsOffset = ldsStart + primOrVertexIndex * primOrVertexStride + offsetInPrimOrVertex + Value *primOrVertexOffset = nullptr; if (region == MeshLdsRegion::VertexOutput) { - auto vertexStride = 4 * inOutUsage.outputMapLocCount; - ldsOffset = m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(vertexStride)); + primOrVertexOffset = + m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.vertexStride)); } else { assert(region == MeshLdsRegion::PrimitiveOutput); - auto primitiveStride = 4 * inOutUsage.perPrimitiveOutputMapLocCount; - ldsOffset = m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(primitiveStride)); + primOrVertexOffset = + m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.primitiveStride)); } - ldsOffset = m_builder.CreateAdd(ldsOffset, m_builder.getInt32(4 * location)); - Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(region)); - ldsOffset = m_builder.CreateAdd(ldsStart, ldsOffset); + Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart( + region == MeshLdsRegion::PrimitiveOutput ? MeshLdsRegion::PrimitiveOutput : MeshLdsRegion::VertexOutput)); + Value *offsetInPrimOrVertex = + m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, region == MeshLdsRegion::PrimitiveOutput)); + + auto ldsOffset = ldsStart; + ldsOffset = m_builder.CreateAdd(ldsOffset, primOrVertexOffset); + ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInPrimOrVertex); return readValueFromLds(readTy, ldsOffset); } @@ -2791,7 +2932,7 @@ Value *MeshTaskShader::readValueFromLds(Type *readTy, Value *ldsOffset) { assert(m_lds); assert(readTy->isIntOrIntVectorTy() || readTy->isFPOrFPVectorTy()); - Value *readPtr = m_builder.CreateGEP(m_lds->getValueType(), m_lds, {m_builder.getInt32(0), ldsOffset}); + Value *readPtr = m_builder.CreateGEP(m_builder.getInt32Ty(), m_lds, ldsOffset); const unsigned bitWidth = readTy->getScalarSizeInBits(); if (bitWidth == 8 || bitWidth == 16) { @@ -2833,7 +2974,7 @@ void MeshTaskShader::writeValueToLds(Value *writeValue, Value *ldsOffset) { auto writeTy = writeValue->getType(); assert(writeTy->isIntOrIntVectorTy() || writeTy->isFPOrFPVectorTy()); - Value *writePtr = m_builder.CreateGEP(m_lds->getValueType(), m_lds, {m_builder.getInt32(0), ldsOffset}); + Value *writePtr = m_builder.CreateGEP(m_builder.getInt32Ty(), m_lds, ldsOffset); const unsigned bitWidth = writeTy->getScalarSizeInBits(); if (bitWidth == 8 || bitWidth == 16) { @@ -2875,7 +3016,7 @@ void MeshTaskShader::atomicOpWithLds(AtomicRMWInst::BinOp atomicOp, Value *atomi assert(atomicValue->getType()->isIntegerTy(32)); // NOTE: Here, we just use LDS atomics to do ALU operations on LDS. No synchronization between threads is needed. - Value *atomicPtr = m_builder.CreateGEP(m_lds->getValueType(), m_lds, {m_builder.getInt32(0), ldsOffset}); + Value *atomicPtr = m_builder.CreateGEP(m_builder.getInt32Ty(), m_lds, ldsOffset); m_builder.CreateAtomicRMW(atomicOp, atomicPtr, atomicValue, MaybeAlign(), AtomicOrdering::Monotonic, SyncScope::SingleThread); } diff --git a/lgc/patch/MeshTaskShader.h b/lgc/patch/MeshTaskShader.h index da8fa30c83..e224fd7100 100644 --- a/lgc/patch/MeshTaskShader.h +++ b/lgc/patch/MeshTaskShader.h @@ -61,6 +61,17 @@ enum class MeshLdsRegion : unsigned { // Map: LDS Region -> typedef std::unordered_map> MeshLdsLayout; +// Mesh shader outputs layout +struct MeshOutputsLayout { + unsigned vertexStride; // Vertex stride (in dwords) + unsigned vertexExportCount; // Vertex export count + unsigned primitiveStride; // Primitive stride (in dwords) + unsigned primitiveExportCount; // Primitive export count + std::map offsetsInVertex; // Map from output location to output offset within a vertex (in dwords) + std::map + offsetsInPrimitive; // Map from output location to output offset within a primitive (in dwords) +}; + // ===================================================================================================================== // Represents the handler of mesh/task shader. class MeshTaskShader { @@ -69,7 +80,7 @@ class MeshTaskShader { ~MeshTaskShader(); static unsigned layoutMeshShaderLds(PipelineState *pipelineState, llvm::Function *entryPoint, - MeshLdsLayout *ldsLayout = nullptr); + MeshLdsLayout *ldsLayout = nullptr, MeshOutputsLayout *outputsLayout = nullptr); void process(llvm::Function *taskEntryPoint, llvm::Function *meshEntryPoint); @@ -86,8 +97,7 @@ class MeshTaskShader { void lowerSetMeshPrimitiveIndices(SetMeshPrimitiveIndicesOp &setMeshPrimitiveIndicesOp); void lowerSetMeshPrimitiveCulled(SetMeshPrimitiveCulledOp &setMeshPrimitiveCulledOp); void lowerGetMeshBuiltinInput(GetMeshBuiltinInputOp &getMeshBuiltinInputOp); - void lowerWriteMeshVertexOutput(WriteMeshVertexOutputOp &writeMeshVertexOutputOp); - void lowerWriteMeshPrimitiveOutput(WriteMeshPrimitiveOutputOp &writeMeshPrimitiveOutputOp); + void lowerWriteMeshOutput(WriteMeshOutputOp &writeMeshOutputOp); void initWaveThreadInfo(llvm::Function *entryPoint); llvm::Value *getShaderRingEntryIndex(llvm::Function *entryPoint); @@ -114,7 +124,7 @@ class MeshTaskShader { }; // Export info of a single entry struct ExportInfo { - unsigned index; + unsigned slot; std::array values; }; void doExport(ExportKind kind, llvm::ArrayRef exports); @@ -138,6 +148,16 @@ class MeshTaskShader { return m_ldsLayout[region].first; } + unsigned getOutputOffsetInPrimOrVertex(unsigned location, bool inPrimitive) { + if (inPrimitive) { + assert(m_outputsLayout.offsetsInPrimitive.count(location) > 0); // Must exist + return m_outputsLayout.offsetsInPrimitive[location]; + } + + assert(m_outputsLayout.offsetsInVertex.count(location) > 0); // Must exist + return m_outputsLayout.offsetsInVertex[location]; + } + llvm::Value *readValueFromLds(llvm::Type *readTy, llvm::Value *ldsOffset); void writeValueToLds(llvm::Value *writeValue, llvm::Value *ldsOffset); void atomicOpWithLds(llvm::AtomicRMWInst::BinOp atomicOp, llvm::Value *atomicValue, llvm::Value *ldsOffset); @@ -185,7 +205,8 @@ class MeshTaskShader { GfxIpVersion m_gfxIp; // Graphics IP version info - MeshLdsLayout m_ldsLayout; // Mesh shader LDS layout + MeshLdsLayout m_ldsLayout; // Mesh shader LDS layout + MeshOutputsLayout m_outputsLayout; // Mesh shader outputs layout }; } // namespace lgc diff --git a/lgc/patch/NggPrimShader.cpp b/lgc/patch/NggPrimShader.cpp index ba73df4900..a5a5943ed5 100644 --- a/lgc/patch/NggPrimShader.cpp +++ b/lgc/patch/NggPrimShader.cpp @@ -29,7 +29,6 @@ *********************************************************************************************************************** */ #include "NggPrimShader.h" -#include "Gfx9Chip.h" #include "ShaderMerger.h" #include "lgc/patch/Patch.h" #include "lgc/state/PalMetadata.h" @@ -134,15 +133,15 @@ NggPrimShader::NggPrimShader(PipelineState *pipelineState) } buildPrimShaderCbLayoutLookupTable(); - calcVertexCullInfoSizeAndOffsets(m_pipelineState, m_vertCullInfoOffsets); } // ===================================================================================================================== // Calculates the dword size of ES-GS ring item. // // @param pipelineState : Pipeline state +// @param esMain : ES main function // @returns : ES-GS ring item size in dwords -unsigned NggPrimShader::calcEsGsRingItemSize(PipelineState *pipelineState) { +unsigned NggPrimShader::calcEsGsRingItemSize(PipelineState *pipelineState, Function *esMain) { assert(pipelineState->getNggControl()->enableNgg); // Must enable NGG // API GS is present @@ -156,14 +155,8 @@ unsigned NggPrimShader::calcEsGsRingItemSize(PipelineState *pipelineState) { if (pipelineState->getNggControl()->passthroughMode) { unsigned esGsRingItemSize = 1; - if (pipelineState->enableSwXfb()) { - const bool hasTes = pipelineState->hasShaderStage(ShaderStage::TessEval); - auto resUsage = pipelineState->getShaderResourceUsage(hasTes ? ShaderStage::TessEval : ShaderStage::Vertex); - - // NOTE: For GFX11+, transform feedback outputs (each output is <4 x dword>) are stored as a ES-GS ring item. - assert(resUsage->inOutUsage.xfbExpCount > 0); - esGsRingItemSize = resUsage->inOutUsage.xfbExpCount * 4; - } + if (pipelineState->enableSwXfb()) + esGsRingItemSize = calcEsXfbOutputsSize(esMain); // NOTE: Make esGsRingItemSize odd by "| 1", to optimize ES -> GS ring layout for LDS bank conflicts. return esGsRingItemSize | 1; @@ -172,7 +165,7 @@ unsigned NggPrimShader::calcEsGsRingItemSize(PipelineState *pipelineState) { // Culling mode is enabled (API GS is not present) VertexCullInfoOffsets vertCullInfoOffsets = {}; // Dummy offsets (don't care) // In the culling mode, the ES-GS ring item is vertex cull info. - unsigned esGsRingItemSize = calcVertexCullInfoSizeAndOffsets(pipelineState, vertCullInfoOffsets); + unsigned esGsRingItemSize = calcVertexCullInfoSizeAndOffsets(pipelineState, esMain, vertCullInfoOffsets); // NOTE: Make esGsRingItemSize odd by "| 1", to optimize ES -> GS ring layout for LDS bank conflicts. return esGsRingItemSize | 1; @@ -233,7 +226,7 @@ PrimShaderLdsUsageInfo NggPrimShader::layoutPrimShaderLds(PipelineState *pipelin } // Primitive data - ldsRegionSize = Gfx9::NggMaxThreadsPerSubgroup * MaxGsStreams; // 1 dword per primitive thread, 4 GS streams + ldsRegionSize = NggMaxThreadsPerSubgroup * MaxGsStreams; // 1 dword per primitive thread, 4 GS streams if (ldsLayout) { printLdsRegionInfo("Primitive Connectivity Data", ldsOffset, ldsRegionSize); (*ldsLayout)[PrimShaderLdsRegion::PrimitiveData] = std::make_pair(ldsOffset, ldsRegionSize); @@ -244,7 +237,7 @@ PrimShaderLdsUsageInfo NggPrimShader::layoutPrimShaderLds(PipelineState *pipelin // Primitive counts if (pipelineState->enableSwXfb() || pipelineState->enablePrimStats()) { ldsRegionSize = - (Gfx9::NggMaxWavesPerSubgroup + 1) * MaxGsStreams; // 1 dword per wave and 1 dword per subgroup, 4 GS streams + (NggMaxWavesPerSubgroup + 1) * MaxGsStreams; // 1 dword per wave and 1 dword per subgroup, 4 GS streams if (ldsLayout) { printLdsRegionInfo("Primitive Counts", ldsOffset, ldsRegionSize); (*ldsLayout)[PrimShaderLdsRegion::PrimitiveCounts] = std::make_pair(ldsOffset, ldsRegionSize); @@ -255,7 +248,7 @@ PrimShaderLdsUsageInfo NggPrimShader::layoutPrimShaderLds(PipelineState *pipelin // Primitive index map (compacted -> uncompacted) if (pipelineState->enableSwXfb()) { - ldsRegionSize = Gfx9::NggMaxThreadsPerSubgroup * MaxGsStreams; // 1 dword per primitive thread, 4 GS streams + ldsRegionSize = NggMaxThreadsPerSubgroup * MaxGsStreams; // 1 dword per primitive thread, 4 GS streams if (ldsLayout) { printLdsRegionInfo("Primitive Index Map (To Uncompacted)", ldsOffset, ldsRegionSize); (*ldsLayout)[PrimShaderLdsRegion::PrimitiveIndexMap] = std::make_pair(ldsOffset, ldsRegionSize); @@ -275,7 +268,7 @@ PrimShaderLdsUsageInfo NggPrimShader::layoutPrimShaderLds(PipelineState *pipelin } } else { ldsRegionSize = - (Gfx9::NggMaxWavesPerSubgroup + 1) * MaxGsStreams; // 1 dword per wave and 1 dword per subgroup, 4 GS streams + (NggMaxWavesPerSubgroup + 1) * MaxGsStreams; // 1 dword per wave and 1 dword per subgroup, 4 GS streams if (ldsLayout) { printLdsRegionInfo("Vertex Counts", ldsOffset, ldsRegionSize); (*ldsLayout)[PrimShaderLdsRegion::VertexCounts] = std::make_pair(ldsOffset, ldsRegionSize); @@ -295,7 +288,7 @@ PrimShaderLdsUsageInfo NggPrimShader::layoutPrimShaderLds(PipelineState *pipelin (*ldsLayout)[PrimShaderLdsRegion::VertexIndexMap].second); } } else { - ldsRegionSize = Gfx9::NggMaxThreadsPerSubgroup * MaxGsStreams; // 1 dword per vertex thread, 4 GS streams + ldsRegionSize = NggMaxThreadsPerSubgroup * MaxGsStreams; // 1 dword per vertex thread, 4 GS streams if (ldsLayout) { printLdsRegionInfo("Vertex Index Map (To Uncompacted)", ldsOffset, ldsRegionSize); (*ldsLayout)[PrimShaderLdsRegion::VertexIndexMap] = std::make_pair(ldsOffset, ldsRegionSize); @@ -440,7 +433,7 @@ PrimShaderLdsUsageInfo NggPrimShader::layoutPrimShaderLds(PipelineState *pipelin ldsOffset = 0; // DistributedPrimitiveId is always the first region and is overlapped with VertexPosition // Vertex position - ldsRegionSize = 4 * Gfx9::NggMaxThreadsPerSubgroup; // 4 dwords per vertex thread + ldsRegionSize = 4 * NggMaxThreadsPerSubgroup; // 4 dwords per vertex thread if (ldsLayout) { printLdsRegionInfo("Vertex Position", ldsOffset, ldsRegionSize); (*ldsLayout)[PrimShaderLdsRegion::VertexPosition] = std::make_pair(ldsOffset, ldsRegionSize); @@ -471,7 +464,7 @@ PrimShaderLdsUsageInfo NggPrimShader::layoutPrimShaderLds(PipelineState *pipelin } // Vertex counts - ldsRegionSize = Gfx9::NggMaxWavesPerSubgroup + 1; // 1 dword per wave and 1 dword per subgroup + ldsRegionSize = NggMaxWavesPerSubgroup + 1; // 1 dword per wave and 1 dword per subgroup if (ldsLayout) { printLdsRegionInfo("Vertex Counts", ldsOffset, ldsRegionSize); (*ldsLayout)[PrimShaderLdsRegion::VertexCounts] = std::make_pair(ldsOffset, ldsRegionSize); @@ -481,7 +474,7 @@ PrimShaderLdsUsageInfo NggPrimShader::layoutPrimShaderLds(PipelineState *pipelin // Vertex index map if (pipelineState->getNggControl()->compactVertex) { - ldsRegionSize = Gfx9::NggMaxThreadsPerSubgroup; // 1 dword per wave and 1 dword per subgroup + ldsRegionSize = NggMaxThreadsPerSubgroup; // 1 dword per wave and 1 dword per subgroup if (ldsLayout) { printLdsRegionInfo("Vertex Index Map (To Uncompacted)", ldsOffset, ldsRegionSize); (*ldsLayout)[PrimShaderLdsRegion::VertexIndexMap] = std::make_pair(ldsOffset, ldsRegionSize); @@ -509,8 +502,6 @@ PrimShaderLdsUsageInfo NggPrimShader::layoutPrimShaderLds(PipelineState *pipelin // @param gsMain : GS main function (could be null) // @param copyShader : Copy shader main function (could be null) Function *NggPrimShader::generate(Function *esMain, Function *gsMain, Function *copyShader) { - assert(m_gfxIp.major >= 10); - // ES and GS could not be null at the same time assert((!esMain && !gsMain) == false); @@ -598,7 +589,8 @@ Function *NggPrimShader::generate(Function *esMain, Function *gsMain, Function * } // Setup LDS layout - m_lds = Patch::getLdsVariable(m_pipelineState, module); + m_lds = Patch::getLdsVariable(m_pipelineState, gsMain ? gsMain : esMain); + calcVertexCullInfoSizeAndOffsets(m_pipelineState, esMain, m_vertCullInfoOffsets); layoutPrimShaderLds(m_pipelineState, &m_ldsLayout); // Build primitive shader body @@ -621,9 +613,10 @@ Function *NggPrimShader::generate(Function *esMain, Function *gsMain, Function * // item of vertex cull info region. // // @param pipelineState : Pipeline state +// @param esMain : ES main function // @param [out] vertCullInfoOffsets : The collection of LDS offsets to build // @returns : Dword size of vertex cull info -unsigned NggPrimShader::calcVertexCullInfoSizeAndOffsets(PipelineState *pipelineState, +unsigned NggPrimShader::calcVertexCullInfoSizeAndOffsets(PipelineState *pipelineState, Function *esMain, VertexCullInfoOffsets &vertCullInfoOffsets) { auto nggControl = pipelineState->getNggControl(); assert(nggControl->enableNgg); @@ -640,12 +633,7 @@ unsigned NggPrimShader::calcVertexCullInfoSizeAndOffsets(PipelineState *pipeline unsigned itemSize = 0; if (pipelineState->enableSwXfb()) { - const bool hasTes = pipelineState->hasShaderStage(ShaderStage::TessEval); - auto resUsage = pipelineState->getShaderResourceUsage(hasTes ? ShaderStage::TessEval : ShaderStage::Vertex); - - // NOTE: Each transform feedback output is <4 x dword>. - const unsigned xfbOutputCount = resUsage->inOutUsage.xfbExpCount; - itemSize = sizeof(VertexCullInfo::xfbOutputs) * xfbOutputCount / sizeof(unsigned); + itemSize = calcEsXfbOutputsSize(esMain); cullInfoSize += itemSize; vertCullInfoOffsets.xfbOutputs = cullInfoOffset; cullInfoOffset += itemSize; @@ -723,6 +711,42 @@ unsigned NggPrimShader::calcVertexCullInfoSizeAndOffsets(PipelineState *pipeline return cullInfoSize; } +// ===================================================================================================================== +// Calculate and return the dword size of total transform feedback outputs to write for the ES stage. +// +// NOTE: For non 64-bit output, the value is its element count (8-bit/16-bit scalars are padded to 32-bit); for 64-bit +// output, the value is doubled since each 64-bit scalar is split to two dwords to write. This info is used by ES (VS +// or TES in non-GS pipeline) to write the outputs to NGG LDS space on GFX11+ to do SW emulated stream-out. +// +// @param esMain : ES main function +// @returns : Dword size of total transform feedback outputs to write +unsigned NggPrimShader::calcEsXfbOutputsSize(Function *esMain) { + unsigned xfbOutputsSize = 0; + + for (auto &func : esMain->getParent()->functions()) { + if (!func.getName().starts_with(lgcName::OutputExportXfb) && !func.getName().starts_with(lgcName::NggXfbExport)) + continue; + + for (auto user : func.users()) { + CallInst *const call = dyn_cast(user); + assert(call); + + if (call->getFunction() != esMain) + continue; + + auto xfbOutput = call->getArgOperand(call->arg_size() - 1); + + Type *xfbOutputTy = xfbOutput->getType(); + unsigned xfbOutputSize = xfbOutputTy->isVectorTy() ? cast(xfbOutputTy)->getNumElements() : 1; + if (xfbOutputTy->getScalarSizeInBits() == 64) + xfbOutputSize *= 2; // Double it + xfbOutputsSize += xfbOutputSize; + } + } + + return xfbOutputsSize; +} + // ===================================================================================================================== // Get primitive shader entry-point type. // @@ -1077,7 +1101,7 @@ void NggPrimShader::buildPrimShader(Function *primShader) { const unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Geometry); assert(waveSize == 32 || waveSize == 64); - const unsigned waveCountInSubgroup = Gfx9::NggMaxThreadsPerSubgroup / waveSize; + const unsigned waveCountInSubgroup = NggMaxThreadsPerSubgroup / waveSize; SmallVector args; for (auto &arg : primShader->args()) @@ -1839,7 +1863,7 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { if (!m_nggControl->compactVertex) assert(m_gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ - const unsigned waveCountInSubgroup = Gfx9::NggMaxThreadsPerSubgroup / waveSize; + const unsigned waveCountInSubgroup = NggMaxThreadsPerSubgroup / waveSize; const bool cullingMode = !m_nggControl->passthroughMode; const auto rasterStream = m_pipelineState->getRasterizerState().rasterStream; @@ -2017,7 +2041,7 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { for (unsigned i = 0; i < MaxGsStreams; ++i) { if (m_pipelineState->isVertexStreamActive(i)) { // Initialize primitive connectivity data if the stream is active writePerThreadDataToLds(m_builder.getInt32(NullPrim), m_nggInputs.threadIdInSubgroup, - PrimShaderLdsRegion::PrimitiveData, Gfx9::NggMaxThreadsPerSubgroup * i); + PrimShaderLdsRegion::PrimitiveData, NggMaxThreadsPerSubgroup * i); } } @@ -2063,7 +2087,7 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { m_builder.SetInsertPoint(initVertexCountsBlock); writePerThreadDataToLds(m_builder.getInt32(0), m_nggInputs.threadIdInSubgroup, PrimShaderLdsRegion::VertexCounts, - (Gfx9::NggMaxWavesPerSubgroup + 1) * rasterStream); + (NggMaxWavesPerSubgroup + 1) * rasterStream); m_builder.CreateBr(endInitVertexCountsBlock); } @@ -2076,9 +2100,8 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { createFenceAndBarrier(); if (cullingMode) { - primData = - readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, - PrimShaderLdsRegion::PrimitiveData, Gfx9::NggMaxThreadsPerSubgroup * rasterStream); + primData = readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, + PrimShaderLdsRegion::PrimitiveData, NggMaxThreadsPerSubgroup * rasterStream); auto tryCullPrimitive = m_builder.CreateICmpNE(primData, m_builder.getInt32(NullPrim)); auto validPrimitive = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.primCountInSubgroup); tryCullPrimitive = m_builder.CreateAnd(tryCullPrimitive, validPrimitive); @@ -2130,7 +2153,7 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { m_builder.SetInsertPoint(nullifyPrimitiveDataBlock); writePerThreadDataToLds(m_builder.getInt32(NullPrim), m_nggInputs.threadIdInSubgroup, - PrimShaderLdsRegion::PrimitiveData, Gfx9::NggMaxThreadsPerSubgroup * rasterStream); + PrimShaderLdsRegion::PrimitiveData, NggMaxThreadsPerSubgroup * rasterStream); m_builder.CreateBr(endCullPrimitiveBlock); } @@ -2169,7 +2192,7 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { // drawFlag = primData[N] != NullPrim auto primData0 = readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, - PrimShaderLdsRegion::PrimitiveData, Gfx9::NggMaxThreadsPerSubgroup * rasterStream); + PrimShaderLdsRegion::PrimitiveData, NggMaxThreadsPerSubgroup * rasterStream); auto drawFlag0 = m_builder.CreateICmpNE(primData0, m_builder.getInt32(NullPrim)); drawFlag = drawFlag0; @@ -2177,7 +2200,7 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { // drawFlag |= N >= 1 ? (primData[N-1] != NullPrim) : false auto primData1 = readPerThreadDataFromLds( m_builder.getInt32Ty(), m_builder.CreateSub(m_nggInputs.threadIdInSubgroup, m_builder.getInt32(1)), - PrimShaderLdsRegion::PrimitiveData, Gfx9::NggMaxThreadsPerSubgroup * rasterStream); + PrimShaderLdsRegion::PrimitiveData, NggMaxThreadsPerSubgroup * rasterStream); auto drawFlag1 = m_builder.CreateSelect(m_builder.CreateICmpUGE(m_nggInputs.threadIdInSubgroup, m_builder.getInt32(1)), m_builder.CreateICmpNE(primData1, m_builder.getInt32(NullPrim)), m_builder.getFalse()); @@ -2188,7 +2211,7 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { // drawFlag |= N >= 2 ? (primData[N-2] != NullPrim) : false auto primData2 = readPerThreadDataFromLds( m_builder.getInt32Ty(), m_builder.CreateSub(m_nggInputs.threadIdInSubgroup, m_builder.getInt32(2)), - PrimShaderLdsRegion::PrimitiveData, Gfx9::NggMaxThreadsPerSubgroup * rasterStream); + PrimShaderLdsRegion::PrimitiveData, NggMaxThreadsPerSubgroup * rasterStream); auto drawFlag2 = m_builder.CreateSelect(m_builder.CreateICmpUGE(m_nggInputs.threadIdInSubgroup, m_builder.getInt32(2)), m_builder.CreateICmpNE(primData2, m_builder.getInt32(NullPrim)), m_builder.getFalse()); @@ -2227,8 +2250,8 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { unsigned regionStart = getLdsRegionStart(PrimShaderLdsRegion::VertexCounts); - ldsOffset = m_builder.CreateAdd( - ldsOffset, m_builder.getInt32(regionStart + (Gfx9::NggMaxWavesPerSubgroup + 1) * rasterStream)); + ldsOffset = + m_builder.CreateAdd(ldsOffset, m_builder.getInt32(regionStart + (NggMaxWavesPerSubgroup + 1) * rasterStream)); atomicAdd(vertCountInWave, ldsOffset); m_builder.CreateBr(endAccumVertexCountsBlock); @@ -2242,9 +2265,9 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { createFenceAndBarrier(); if (m_nggControl->compactVertex) { - auto vertCountInWaves = readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInWave, - PrimShaderLdsRegion::VertexCounts, - (Gfx9::NggMaxWavesPerSubgroup + 1) * rasterStream); + auto vertCountInWaves = + readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInWave, + PrimShaderLdsRegion::VertexCounts, (NggMaxWavesPerSubgroup + 1) * rasterStream); // The last dword following dwords for all waves (each wave has one dword) stores GS output vertex count of the // entire subgroup @@ -2868,7 +2891,7 @@ void NggPrimShader::exportPrimitiveWithGs(Value *startingVertexIndex) { const auto rasterStream = m_pipelineState->getRasterizerState().rasterStream; Value *primData = readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, - PrimShaderLdsRegion::PrimitiveData, Gfx9::NggMaxThreadsPerSubgroup * rasterStream); + PrimShaderLdsRegion::PrimitiveData, NggMaxThreadsPerSubgroup * rasterStream); auto validPrimitive = m_builder.CreateICmpNE(primData, m_builder.getInt32(NullPrim)); // Primitive connectivity data have such layout: @@ -4200,8 +4223,8 @@ Function *NggPrimShader::createGsEmitHandler() { const unsigned regionStart = getLdsRegionStart(PrimShaderLdsRegion::PrimitiveData); // ldsOffset = regionStart + vertexIndex + NggMaxThreadsPerSubgroup * streamId auto ldsOffset = m_builder.CreateAdd(m_builder.getInt32(regionStart), vertexIndex); - ldsOffset = m_builder.CreateAdd(ldsOffset, - m_builder.CreateMul(m_builder.getInt32(Gfx9::NggMaxThreadsPerSubgroup), streamId)); + ldsOffset = + m_builder.CreateAdd(ldsOffset, m_builder.CreateMul(m_builder.getInt32(NggMaxThreadsPerSubgroup), streamId)); writeValueToLds(winding, ldsOffset); m_builder.CreateBr(endEmitPrimBlock); @@ -6125,10 +6148,9 @@ void NggPrimShader::processVertexAttribExport(Function *&target) { coherent.bits.glc = true; coherent.bits.slc = true; } - auto store = m_builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_store, attribValue->getType(), - {attribValue, attribRingBufDesc, vertexIndex, locationOffset, ringOffset, - m_builder.getInt32(coherent.u32All)}); - (void)store; + m_builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_store, attribValue->getType(), + {attribValue, attribRingBufDesc, vertexIndex, locationOffset, ringOffset, + m_builder.getInt32(coherent.u32All)}); removedCalls.push_back(call); } @@ -6276,11 +6298,23 @@ void NggPrimShader::processSwXfb(ArrayRef args) { auto xfbOutputs = fetchXfbOutput(m_esHandlers.main, args, xfbOutputExports); for (unsigned i = 0; i < xfbOutputExports.size(); ++i) { + const auto &xfbOutputExport = xfbOutputExports[i]; assert(xfbOutputs->getType()->isArrayTy()); // Must be arrayed auto outputValue = m_builder.CreateExtractValue(xfbOutputs, i); + // Extract valid elements from returned transform feedback output + assert(outputValue->getType() == FixedVectorType::get(m_builder.getInt32Ty(), 4)); // Must be <4 x i32> + if (xfbOutputExport.numElements == 1) { + outputValue = m_builder.CreateExtractElement(outputValue, static_cast(0)); + } else { + SmallVector shuffleMask; + for (unsigned j = 0; j < xfbOutputExport.numElements; ++j) + shuffleMask.push_back(j); + outputValue = m_builder.CreateShuffleVector(outputValue, outputValue, shuffleMask); + } + // Write transform feedback outputs to LDS region - writeXfbOutputToLds(outputValue, m_nggInputs.threadIdInSubgroup, i); + writeXfbOutputToLds(outputValue, m_nggInputs.threadIdInSubgroup, xfbOutputExport.offsetInVertex); } m_builder.CreateBr(endFetchXfbOutputBlock); @@ -6478,7 +6512,7 @@ void NggPrimShader::processSwXfb(ArrayRef args) { auto outputValue = readXfbOutputFromLds( xfbOutputExport.numElements > 1 ? FixedVectorType::get(m_builder.getFloatTy(), xfbOutputExport.numElements) : m_builder.getFloatTy(), - vertexIndices[i], j); + vertexIndices[i], xfbOutputExport.offsetInVertex); if (xfbOutputExport.is16bit) { // NOTE: For 16-bit transform feedbakc outputs, they are stored as 32-bit without tightly packed in LDS. @@ -6571,7 +6605,7 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { const unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Geometry); assert(waveSize == 32 || waveSize == 64); - const unsigned waveCountInSubgroup = Gfx9::NggMaxThreadsPerSubgroup / waveSize; + const unsigned waveCountInSubgroup = NggMaxThreadsPerSubgroup / waveSize; const auto &xfbStrides = m_pipelineState->getXfbBufferStrides(); const auto &streamXfbBuffers = m_pipelineState->getStreamXfbBuffers(); @@ -6724,7 +6758,7 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { for (unsigned i = 0; i < MaxGsStreams; ++i) { if (m_pipelineState->isVertexStreamActive(i)) { writePerThreadDataToLds(m_builder.getInt32(0), m_nggInputs.threadIdInSubgroup, - PrimShaderLdsRegion::PrimitiveCounts, (Gfx9::NggMaxWavesPerSubgroup + 1) * i); + PrimShaderLdsRegion::PrimitiveCounts, (NggMaxWavesPerSubgroup + 1) * i); } } @@ -6749,9 +6783,8 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { for (unsigned i = 0; i < MaxGsStreams; ++i) { if (m_pipelineState->isVertexStreamActive(i)) { // drawFlag = primData[N] != NullPrim - auto primData = - readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, - PrimShaderLdsRegion::PrimitiveData, Gfx9::NggMaxThreadsPerSubgroup * i); + auto primData = readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, + PrimShaderLdsRegion::PrimitiveData, NggMaxThreadsPerSubgroup * i); drawFlag[i] = m_builder.CreateICmpNE(primData, m_builder.getInt32(NullPrim)); } } @@ -6798,9 +6831,8 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { for (unsigned i = 0; i < MaxGsStreams; ++i) { if (m_pipelineState->isVertexStreamActive(i)) { - atomicAdd( - primCountInWave[i], - m_builder.CreateAdd(ldsOffset, m_builder.getInt32(regionStart + (Gfx9::NggMaxWavesPerSubgroup + 1) * i))); + atomicAdd(primCountInWave[i], + m_builder.CreateAdd(ldsOffset, m_builder.getInt32(regionStart + (NggMaxWavesPerSubgroup + 1) * i))); } } @@ -6821,7 +6853,7 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { auto primCountInWaves = readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInWave, - PrimShaderLdsRegion::PrimitiveCounts, (Gfx9::NggMaxWavesPerSubgroup + 1) * i); + PrimShaderLdsRegion::PrimitiveCounts, (NggMaxWavesPerSubgroup + 1) * i); // The last dword following dwords for all waves (each wave has one dword) stores GS output primitive count of // the entire subgroup @@ -6861,7 +6893,7 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { compactedPrimitiveIndex = m_builder.CreateAdd(primCountInPrevWaves[i], compactedPrimitiveIndex); writePerThreadDataToLds(m_nggInputs.threadIdInSubgroup, compactedPrimitiveIndex, - PrimShaderLdsRegion::PrimitiveIndexMap, Gfx9::NggMaxThreadsPerSubgroup * i); + PrimShaderLdsRegion::PrimitiveIndexMap, NggMaxThreadsPerSubgroup * i); m_builder.CreateBr(endCompactPrimitiveIndexBlock[i]); } @@ -7066,7 +7098,7 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { Value *uncompactedPrimitiveIndex = readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, - PrimShaderLdsRegion::PrimitiveIndexMap, Gfx9::NggMaxThreadsPerSubgroup * i); + PrimShaderLdsRegion::PrimitiveIndexMap, NggMaxThreadsPerSubgroup * i); Value *vertexIndex = uncompactedPrimitiveIndex; const unsigned outVertsPerPrim = m_pipelineState->getVerticesPerPrimitive(); @@ -7077,9 +7109,8 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { if (outVertsPerPrim > 2) { vertexIndices[2] = m_builder.CreateAdd(vertexIndex, m_builder.getInt32(2)); - Value *primData = - readPerThreadDataFromLds(m_builder.getInt32Ty(), uncompactedPrimitiveIndex, - PrimShaderLdsRegion::PrimitiveData, Gfx9::NggMaxThreadsPerSubgroup * i); + Value *primData = readPerThreadDataFromLds(m_builder.getInt32Ty(), uncompactedPrimitiveIndex, + PrimShaderLdsRegion::PrimitiveData, NggMaxThreadsPerSubgroup * i); // NOTE: primData[N] corresponds to the forming vertex // The vertice indices in the first triangle // If provoking vertex is the first one, the vertice indices in the second triangle is , otherwise @@ -7225,11 +7256,9 @@ Value *NggPrimShader::fetchXfbOutput(Function *target, ArrayRef args SmallVector &xfbOutputExports) { assert(m_pipelineState->enableSwXfb()); - const unsigned xfbOutputCount = - m_pipelineState - ->getShaderResourceUsage(m_hasGs ? ShaderStage::Geometry - : (m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex)) - ->inOutUsage.xfbExpCount; + auto resUsage = m_pipelineState->getShaderResourceUsage( + m_hasGs ? ShaderStage::Geometry : (m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex)); + const unsigned xfbOutputCount = resUsage->inOutUsage.xfbExpCount; // Skip following handling if transform feedback output is empty if (xfbOutputCount == 0) @@ -7310,6 +7339,7 @@ Value *NggPrimShader::fetchXfbOutput(Function *target, ArrayRef args Value *xfbOutputs = PoisonValue::get(xfbOutputsTy); unsigned outputIndex = 0; + unsigned offsetInVertex = 0; for (auto func : expFuncs) { for (auto user : func->users()) { @@ -7390,10 +7420,22 @@ Value *NggPrimShader::fetchXfbOutput(Function *target, ArrayRef args xfbOutputExports[outputIndex].xfbOffset = xfbOffset; xfbOutputExports[outputIndex].numElements = numElements; xfbOutputExports[outputIndex].is16bit = is16bit; - // Those values are just for GS - xfbOutputExports[outputIndex].locInfo.streamId = streamId; - xfbOutputExports[outputIndex].locInfo.location = location; - xfbOutputExports[outputIndex].locInfo.component = component; + + if (m_hasGs) { + // Update fields for GS to use + xfbOutputExports[outputIndex].locInfo.streamId = streamId; + xfbOutputExports[outputIndex].locInfo.location = location; + xfbOutputExports[outputIndex].locInfo.component = component; + } else { + // Update the field for ES to use + xfbOutputExports[outputIndex].offsetInVertex = offsetInVertex; + + unsigned xfbOutputSize = numElements; + // Double the size if 64-bit output + if (outputValue->getType()->getScalarSizeInBits() == 64) + xfbOutputSize *= 2; + offsetInVertex += xfbOutputSize; // Increment the offset + } ++outputIndex; } @@ -7403,6 +7445,7 @@ Value *NggPrimShader::fetchXfbOutput(Function *target, ArrayRef args } assert(outputIndex == xfbOutputCount); // Visit all transform feedback export calls + m_builder.CreateRet(xfbOutputs); // Remove calls @@ -7640,9 +7683,8 @@ void NggPrimShader::collectPrimitiveStats() { for (unsigned i = 0; i < MaxGsStreams; ++i) { if (m_pipelineState->isVertexStreamActive(i)) { // drawFlag = primData[N] != NullPrim - auto primData = - readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, - PrimShaderLdsRegion::PrimitiveData, Gfx9::NggMaxThreadsPerSubgroup * i); + auto primData = readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, + PrimShaderLdsRegion::PrimitiveData, NggMaxThreadsPerSubgroup * i); drawFlag[i] = m_builder.CreateICmpNE(primData, m_builder.getInt32(NullPrim)); } } @@ -7747,8 +7789,8 @@ void NggPrimShader::collectPrimitiveStats() { // // @param readDataTy : Data read from LDS // @param vertexIndex: Relative vertex index in NGG subgroup -// @param outputIndex : Index of this transform feedback output -Value *NggPrimShader::readXfbOutputFromLds(Type *readDataTy, Value *vertexIndex, unsigned outputIndex) { +// @param offsetInVertex : Output offset within all transform feedback outputs of a vertex (in dwords) +Value *NggPrimShader::readXfbOutputFromLds(Type *readDataTy, Value *vertexIndex, unsigned offsetInVertex) { assert(m_pipelineState->enableSwXfb()); // SW-emulated stream-out must be enabled assert(!m_hasGs); @@ -7758,16 +7800,14 @@ Value *NggPrimShader::readXfbOutputFromLds(Type *readDataTy, Value *vertexIndex, if (m_nggControl->passthroughMode) { const auto regionStart = getLdsRegionStart(PrimShaderLdsRegion::XfbOutput); - Value *ldsOffset = - m_builder.CreateAdd(vertexItemOffset, m_builder.getInt32(regionStart + 4 * outputIndex)); // <4 x dword> + Value *ldsOffset = m_builder.CreateAdd(vertexItemOffset, m_builder.getInt32(regionStart + offsetInVertex)); return readValueFromLds(readDataTy, ldsOffset); } // NOTE: For NGG culling mode, transform feedback outputs are part of vertex cull info. const auto regionStart = getLdsRegionStart(PrimShaderLdsRegion::VertexCullInfo); - Value *ldsOffset = - m_builder.CreateAdd(vertexItemOffset, m_builder.getInt32(regionStart + m_vertCullInfoOffsets.xfbOutputs + - 4 * outputIndex)); // <4 x dword> + Value *ldsOffset = m_builder.CreateAdd( + vertexItemOffset, m_builder.getInt32(regionStart + m_vertCullInfoOffsets.xfbOutputs + offsetInVertex)); return readValueFromLds(readDataTy, ldsOffset); } @@ -7776,8 +7816,8 @@ Value *NggPrimShader::readXfbOutputFromLds(Type *readDataTy, Value *vertexIndex, // // @param writeData : Data written to LDS // @param vertexIndex: Relative vertex index in NGG subgroup -// @param outputIndex : Index of this transform feedback output -void NggPrimShader::writeXfbOutputToLds(Value *writeData, Value *vertexIndex, unsigned outputIndex) { +// @param offsetInVertex : Output offset within all transform feedback outputs of a vertex (in dwords) +void NggPrimShader::writeXfbOutputToLds(Value *writeData, Value *vertexIndex, unsigned offsetInVertex) { assert(m_pipelineState->enableSwXfb()); // SW-emulated stream-out must be enabled assert(!m_hasGs); @@ -7787,17 +7827,15 @@ void NggPrimShader::writeXfbOutputToLds(Value *writeData, Value *vertexIndex, un if (m_nggControl->passthroughMode) { const auto regionStart = getLdsRegionStart(PrimShaderLdsRegion::XfbOutput); - Value *ldsOffset = - m_builder.CreateAdd(vertexItemOffset, m_builder.getInt32(regionStart + 4 * outputIndex)); // <4 x dword> + Value *ldsOffset = m_builder.CreateAdd(vertexItemOffset, m_builder.getInt32(regionStart + offsetInVertex)); writeValueToLds(writeData, ldsOffset); return; } // NOTE: For NGG culling mode, transform feedback outputs are part of vertex cull info. const auto regionStart = getLdsRegionStart(PrimShaderLdsRegion::VertexCullInfo); - Value *ldsOffset = - m_builder.CreateAdd(vertexItemOffset, m_builder.getInt32(regionStart + m_vertCullInfoOffsets.xfbOutputs + - 4 * outputIndex)); // <4 x dword> + Value *ldsOffset = m_builder.CreateAdd( + vertexItemOffset, m_builder.getInt32(regionStart + m_vertCullInfoOffsets.xfbOutputs + offsetInVertex)); writeValueToLds(writeData, ldsOffset); } @@ -8004,7 +8042,7 @@ void NggPrimShader::writeValueToLds(Value *writeValue, Value *ldsOffset, bool us void NggPrimShader::atomicAdd(Value *ValueToAdd, Value *ldsOffset) { assert(ValueToAdd->getType()->isIntegerTy(32)); - Value *atomicPtr = m_builder.CreateGEP(m_lds->getValueType(), m_lds, {m_builder.getInt32(0), ldsOffset}); + Value *atomicPtr = m_builder.CreateGEP(m_builder.getInt32Ty(), m_lds, ldsOffset); SyncScope::ID syncScope = m_builder.getContext().getOrInsertSyncScopeID("workgroup"); m_builder.CreateAtomicRMW(AtomicRMWInst::BinOp::Add, atomicPtr, ValueToAdd, MaybeAlign(), diff --git a/lgc/patch/NggPrimShader.h b/lgc/patch/NggPrimShader.h index b7fce9bc3b..4d7efed2b5 100644 --- a/lgc/patch/NggPrimShader.h +++ b/lgc/patch/NggPrimShader.h @@ -89,7 +89,7 @@ struct VertexCullInfo { // // Vertex transform feedback outputs // - unsigned xfbOutputs[4]; + unsigned xfbOutputs[4]; // At most <4 x dword> // // Vertex cull data // @@ -152,11 +152,14 @@ struct XfbOutputExport { unsigned xfbOffset; // Transform feedback offset unsigned numElements; // Number of output elements, valid range is [1,4] bool is16bit; // Whether the output is 16-bit + // For ES only + unsigned offsetInVertex; // Offset of an output within all transform feedback outputs of a vertex + // For GS only struct { unsigned streamId; // Output stream ID unsigned location; // Output location unsigned component; // Output component within a location - } locInfo; // Output location info in GS-VS ring (just for GS) + } locInfo; // Output location info in GS-VS ring }; // Enumerates the LDS regions used by primitive shader @@ -197,7 +200,7 @@ class NggPrimShader { public: NggPrimShader(PipelineState *pipelineState); - static unsigned calcEsGsRingItemSize(PipelineState *pipelineState); + static unsigned calcEsGsRingItemSize(PipelineState *pipelineState, llvm::Function *esMain); static PrimShaderLdsUsageInfo layoutPrimShaderLds(PipelineState *pipelineState, PrimShaderLdsLayout *ldsLayout = nullptr); @@ -208,8 +211,9 @@ class NggPrimShader { NggPrimShader(const NggPrimShader &) = delete; NggPrimShader &operator=(const NggPrimShader &) = delete; - static unsigned calcVertexCullInfoSizeAndOffsets(PipelineState *pipelineState, + static unsigned calcVertexCullInfoSizeAndOffsets(PipelineState *pipelineState, llvm::Function *esMain, VertexCullInfoOffsets &vertCullInfoOffsets); + static unsigned calcEsXfbOutputsSize(llvm::Function *target); llvm::FunctionType *getPrimShaderType(uint64_t &inRegMask); @@ -300,8 +304,8 @@ class NggPrimShader { llvm::Value *fetchXfbOutput(llvm::Function *target, llvm::ArrayRef args, llvm::SmallVector &xfbOutputExports); - llvm::Value *readXfbOutputFromLds(llvm::Type *readDataTy, llvm::Value *vertexIndex, unsigned outputIndex); - void writeXfbOutputToLds(llvm::Value *writeData, llvm::Value *vertexIndex, unsigned outputIndex); + llvm::Value *readXfbOutputFromLds(llvm::Type *readDataTy, llvm::Value *vertexIndex, unsigned offsetInVertex); + void writeXfbOutputToLds(llvm::Value *writeData, llvm::Value *vertexIndex, unsigned offsetInVertex); // Checks if NGG culling operations are enabled bool enableCulling() const { @@ -411,8 +415,8 @@ class NggPrimShader { llvm::IRBuilder<> m_builder; // LLVM IR builder - llvm::GlobalValue *m_lds = nullptr; // Global variable to model primitive shader LDS - PrimShaderLdsLayout m_ldsLayout; // Primitive shader LDS layout + llvm::Constant *m_lds = nullptr; // Global variable to model primitive shader LDS + PrimShaderLdsLayout m_ldsLayout; // Primitive shader LDS layout }; } // namespace lgc diff --git a/lgc/patch/Patch.cpp b/lgc/patch/Patch.cpp index d8ae48f12a..1c94ed3a4c 100644 --- a/lgc/patch/Patch.cpp +++ b/lgc/patch/Patch.cpp @@ -63,6 +63,7 @@ #include "lgc/patch/PatchWorkarounds.h" #include "lgc/patch/TcsPassthroughShader.h" #include "lgc/patch/VertexFetch.h" +#include "lgc/state/AbiMetadata.h" #include "lgc/state/PipelineState.h" #include "lgc/state/TargetInfo.h" #include "lgc/util/Debug.h" @@ -117,6 +118,9 @@ using namespace llvm; +static const char LdsGsName[] = "Lds.GS"; +static const char LdsHsName[] = "Lds.HS"; + namespace lgc { // ===================================================================================================================== @@ -144,7 +148,10 @@ void Patch::addPasses(PipelineState *pipelineState, lgc::PassManager &passMgr, T // that continuation transform does not support are used. passMgr.addPass(LowerGpuRt()); } else { - passMgr.addPass(LowerRaytracingPipelinePass()); + // NOTE: LowerRaytracingPipelinePass should be run before getting into LGC because we will need to collect + // metadata added by the pass. + // Optimize away the alloca's insert during lower-raytracing pipeline to avoid being put in continuation state. + passMgr.addPass(createModuleToFunctionPassAdaptor(SROAPass(llvm::SROAOptions::ModifyCFG))); } addLgcContinuationTransform(passMgr); @@ -435,6 +442,7 @@ void Patch::addOptimizationPasses(lgc::PassManager &passMgr, uint32_t optLevel) .forwardSwitchCondToPhi(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(true) + .hoistCommonInsts(true) .sinkCommonInsts(true))); fpm.addPass(LoopUnrollPass(LoopUnrollOptions(optLevel))); fpm.addPass(SROAPass(SROAOptions::ModifyCFG)); @@ -473,25 +481,50 @@ void Patch::init(Module *module) { // // @param pipelineState : Pipeline state // @param [in/out] module : Module to get or create LDS in -GlobalVariable *Patch::getLdsVariable(PipelineState *pipelineState, Module *module) { +Constant *Patch::getLdsVariable(PipelineState *pipelineState, Function *func, bool rtStack) { + auto module = func->getParent(); auto context = &module->getContext(); - static const char *LdsName = "Lds"; // Name of LDS + auto stage = getShaderStage(func); + assert(stage && "unable to determine stage for LDS usage"); - // See if this module already has LDS. - auto oldLds = module->getNamedValue(LdsName); - if (oldLds) { - // We already have LDS. - return cast(oldLds); + unsigned hwStageMask = pipelineState->getShaderHwStageMask(*stage); + + ShaderStageEnum ldsStage; + const char *ldsName; + if (hwStageMask & Util::Abi::HwShaderGs) { + ldsName = LdsGsName; + ldsStage = ShaderStage::Geometry; + } else if (hwStageMask & Util::Abi::HwShaderHs) { + ldsName = LdsHsName; + ldsStage = ShaderStage::TessControl; + } else { + assert(false && "requesting LDS variable for unknown shader type"); + return nullptr; } - // Now we can create LDS. - // Construct LDS type: [ldsSize * i32], address space 3 - auto ldsSize = pipelineState->getTargetInfo().getGpuProperty().ldsSizePerThreadGroup; - auto ldsTy = ArrayType::get(Type::getInt32Ty(*context), ldsSize); - auto lds = new GlobalVariable(*module, ldsTy, false, GlobalValue::ExternalLinkage, nullptr, LdsName, nullptr, + const unsigned staticLdsSize = pipelineState->getShaderStaticLdsUsage(ldsStage, /*rtStack=*/false); + const unsigned rtLdsSize = pipelineState->getShaderStaticLdsUsage(ldsStage, /*rtStack=*/true); + const unsigned ldsSize = staticLdsSize + rtLdsSize; + + // See if module already has LDS variable. + auto oldLds = func->getParent()->getNamedValue(ldsName); + if (oldLds) + return cast(oldLds); + + // Else create LDS variable for this function. + // LDS type: [ldsSize * i32], address space 3 + const auto i32Ty = Type::getInt32Ty(*context); + const auto ldsTy = ArrayType::get(i32Ty, ldsSize); + auto lds = new GlobalVariable(*module, ldsTy, false, GlobalValue::ExternalLinkage, nullptr, Twine(ldsName), nullptr, GlobalValue::NotThreadLocal, ADDR_SPACE_LOCAL); lds->setAlignment(MaybeAlign(sizeof(unsigned))); + + if (rtStack) { + auto *offset = Constant::getIntegerValue(i32Ty, APInt(32, staticLdsSize)); + return ConstantExpr::getGetElementPtr(i32Ty, lds, offset); + } + return lds; } diff --git a/lgc/patch/PatchBufferOp.cpp b/lgc/patch/PatchBufferOp.cpp index 34c904525e..ad4f8716f7 100644 --- a/lgc/patch/PatchBufferOp.cpp +++ b/lgc/patch/PatchBufferOp.cpp @@ -1517,8 +1517,7 @@ Value *BufferOpLowering::replaceLoadStore(Instruction &inst) { } if (isLoad) { - if (m_pipelineState.getTargetInfo().getGfxIpVersion().major >= 10 && - m_pipelineState.getTargetInfo().getGfxIpVersion().major <= 11) { + if (m_pipelineState.getTargetInfo().getGfxIpVersion().major <= 11) { // TODO For stores? coherent.bits.dlc = isDlc; } diff --git a/lgc/patch/PatchCheckShaderCache.cpp b/lgc/patch/PatchCheckShaderCache.cpp index bf450bbe53..e500d4b296 100644 --- a/lgc/patch/PatchCheckShaderCache.cpp +++ b/lgc/patch/PatchCheckShaderCache.cpp @@ -110,10 +110,11 @@ PreservedAnalyses PatchCheckShaderCache::run(Module &module, ModuleAnalysisManag // locations of generic outputs). We have to add it to shader hash calculation. streamMapEntries(resUsage->inOutUsage.gs.builtInOutLocs, stream); } else if (stage == ShaderStage::Mesh) { - // NOTE: For mesh shader, those two special map info (from built-in IDs to export locations of vertex/primitive - // attributes) is used to export vertex/primitive attributes. - streamMapEntries(resUsage->inOutUsage.mesh.builtInExportLocs, stream); - streamMapEntries(resUsage->inOutUsage.mesh.perPrimitiveBuiltInExportLocs, stream); + // NOTE: For mesh shader, those four special maps are used to export vertex/primitive attributes. + streamMapEntries(resUsage->inOutUsage.mesh.vertexBuiltInExportSlots, stream); + streamMapEntries(resUsage->inOutUsage.mesh.primitiveBuiltInExportSlots, stream); + streamMapEntries(resUsage->inOutUsage.mesh.vertexOutputComponents, stream); + streamMapEntries(resUsage->inOutUsage.mesh.primitiveOutputComponents, stream); } // Store the result of the hash for this shader stage. diff --git a/lgc/patch/PatchCopyShader.cpp b/lgc/patch/PatchCopyShader.cpp index e083ddc79a..b822fb5e94 100644 --- a/lgc/patch/PatchCopyShader.cpp +++ b/lgc/patch/PatchCopyShader.cpp @@ -157,8 +157,7 @@ PreservedAnalyses PatchCopyShader::run(Module &module, ModuleAnalysisManager &an // Set wavefront size const unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::CopyShader); - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 10) - entryPoint->addFnAttr("target-features", ",+wavefrontsize" + std::to_string(waveSize)); + entryPoint->addFnAttr("target-features", ",+wavefrontsize" + std::to_string(waveSize)); // Create ending basic block, and terminate it with return. auto endBlock = BasicBlock::Create(*m_context, "", entryPoint, nullptr); @@ -181,27 +180,18 @@ PreservedAnalyses PatchCopyShader::run(Module &module, ModuleAnalysisManager &an if (!m_pipelineState->getNggControl()->enableNgg) { // If no NGG, the copy shader will become a real HW VS. Set the user data entries in the // PAL metadata here. - if (m_pipelineState->useRegisterFieldFormat()) { - constexpr unsigned NumUserSgprs = 32; - SmallVector userData; - userData.resize(NumUserSgprs, static_cast(UserDataMapping::Invalid)); - userData[0] = static_cast(UserDataMapping::GlobalTable); - if (m_pipelineState->enableXfb()) - userData[intfData->userDataUsage.gs.copyShaderStreamOutTable] = - static_cast(UserDataMapping::StreamOutTable); - m_pipelineState->setUserDataMap(ShaderStage::CopyShader, userData); - } else { - m_pipelineState->getPalMetadata()->setUserDataEntry(ShaderStage::CopyShader, 0, UserDataMapping::GlobalTable); - if (m_pipelineState->enableXfb()) { - m_pipelineState->getPalMetadata()->setUserDataEntry(ShaderStage::CopyShader, - intfData->userDataUsage.gs.copyShaderStreamOutTable, - UserDataMapping::StreamOutTable); - } - } + constexpr unsigned NumUserSgprs = 32; + SmallVector userData; + userData.resize(NumUserSgprs, static_cast(UserDataMapping::Invalid)); + userData[0] = static_cast(UserDataMapping::GlobalTable); + if (m_pipelineState->enableXfb()) + userData[intfData->userDataUsage.gs.copyShaderStreamOutTable] = + static_cast(UserDataMapping::StreamOutTable); + m_pipelineState->setUserDataMap(ShaderStage::CopyShader, userData); } if (m_pipelineState->isGsOnChip()) - m_lds = Patch::getLdsVariable(m_pipelineState, &module); + m_lds = Patch::getLdsVariable(m_pipelineState, entryPoint); unsigned outputStreamCount = 0; for (int i = 0; i < MaxGsStreams; ++i) { @@ -570,10 +560,10 @@ Value *PatchCopyShader::loadValueFromGsVsRing(Type *loadTy, unsigned location, u assert(m_lds); Value *ringOffset = calcGsVsRingOffsetForInput(location, component, streamId, builder); - Value *loadPtr = builder.CreateGEP(m_lds->getValueType(), m_lds, {builder.getInt32(0), ringOffset}); + Value *loadPtr = builder.CreateGEP(builder.getInt32Ty(), m_lds, ringOffset); loadPtr = builder.CreateBitCast(loadPtr, PointerType::get(loadTy, m_lds->getType()->getPointerAddressSpace())); - return builder.CreateAlignedLoad(loadTy, loadPtr, m_lds->getAlign()); + return builder.CreateAlignedLoad(loadTy, loadPtr, m_lds->getPointerAlignment(m_module->getDataLayout())); } CoherentFlag coherent = {}; diff --git a/lgc/patch/PatchEntryPointMutate.cpp b/lgc/patch/PatchEntryPointMutate.cpp index 7237eae3fb..f03f7bfb93 100644 --- a/lgc/patch/PatchEntryPointMutate.cpp +++ b/lgc/patch/PatchEntryPointMutate.cpp @@ -56,9 +56,11 @@ #include "lgc/patch/PatchEntryPointMutate.h" #include "ShaderMerger.h" #include "lgc/LgcContext.h" +#include "lgc/LgcCpsDialect.h" #include "lgc/LgcDialect.h" #include "lgc/builder/BuilderImpl.h" #include "lgc/patch/ShaderInputs.h" +#include "lgc/patch/SystemValues.h" #include "lgc/state/AbiMetadata.h" #include "lgc/state/AbiUnlinked.h" #include "lgc/state/IntrinsDefs.h" @@ -160,6 +162,7 @@ PreservedAnalyses PatchEntryPointMutate::run(Module &module, ModuleAnalysisManag m_cpsShaderInputCache.clear(); processGroupMemcpy(module); + processDriverTableLoad(module); return PreservedAnalyses::none(); } @@ -237,6 +240,41 @@ static Value *mergeDwordsIntoVector(IRBuilder<> &builder, ArrayRef inpu return vec; } +// ===================================================================================================================== +void PatchEntryPointMutate::processDriverTableLoad(Module &module) { + SmallVector toBeErased; + struct Payload { + SmallVectorImpl &toBeErased; + PatchEntryPointMutate *self; + }; + Payload payload = {toBeErased, this}; + + static auto visitor = llvm_dialects::VisitorBuilder() + .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) + .add([](auto &payload, auto &op) { + payload.self->lowerDriverTableLoad(op); + payload.toBeErased.push_back(&op); + }) + .build(); + visitor.visit(payload, module); + for (auto call : payload.toBeErased) + call->eraseFromParent(); +} + +// ===================================================================================================================== +void PatchEntryPointMutate::lowerDriverTableLoad(LoadDriverTableEntryOp &loadDriverTablePtrOp) { + BuilderBase builder(&loadDriverTablePtrOp); + Function *entryPoint = loadDriverTablePtrOp.getFunction(); + builder.SetInsertPoint(&loadDriverTablePtrOp); + + PipelineSystemValues pipelineSysValues; + pipelineSysValues.initialize(m_pipelineState); + + unsigned offset = loadDriverTablePtrOp.getOffset(); + Value *desc = pipelineSysValues.get(entryPoint)->loadDescFromDriverTable(offset, builder); + loadDriverTablePtrOp.replaceAllUsesWith(desc); +} + // ===================================================================================================================== // Lower GroupMemcpyOp void PatchEntryPointMutate::processGroupMemcpy(Module &module) { @@ -445,14 +483,16 @@ void PatchEntryPointMutate::lowerGroupMemcpy(GroupMemcpyOp &groupMemcpyOp) { void PatchEntryPointMutate::lowerAsCpsReference(cps::AsContinuationReferenceOp &asCpsReferenceOp) { BuilderBase builder(&asCpsReferenceOp); - Value *ref = nullptr; - Function &callee = cast(*asCpsReferenceOp.getFn()); - auto level = cps::getCpsLevelFromFunction(callee); + Value *reloc = nullptr; + Function &callee = *cast(asCpsReferenceOp.getFn()); + + Value *loweredReference = lgc::cps::lowerAsContinuationReference(builder, asCpsReferenceOp, reloc); - { ref = builder.CreatePtrToInt(&callee, builder.getInt32Ty()); } - ref = builder.CreateAdd(ref, builder.getInt32(static_cast(level))); + loweredReference = + builder.CreateAdd(loweredReference, builder.getIntN(loweredReference->getType()->getScalarSizeInBits(), + static_cast(cps::getCpsLevelFromFunction(callee)))); - asCpsReferenceOp.replaceAllUsesWith(ref); + asCpsReferenceOp.replaceAllUsesWith(loweredReference); } // ===================================================================================================================== @@ -511,8 +551,8 @@ bool PatchEntryPointMutate::lowerCpsOps(Function *func, ShaderInputs *shaderInpu numUserdata = argTys.size() - 1; } else { numShaderArg = m_cpsShaderInputCache.getTypes().size(); - assert(haveLocalInvocationId == (m_cpsShaderInputCache.getNames().back() == "LocalInvocationId")); numUserdata = haveLocalInvocationId ? numShaderArg - 1 : numShaderArg; + assert(haveLocalInvocationId == (m_cpsShaderInputCache.getNames().back() == "LocalInvocationId")); } // Get all the return instructions. @@ -572,10 +612,10 @@ bool PatchEntryPointMutate::lowerCpsOps(Function *func, ShaderInputs *shaderInpu // Packing VGPR arguments. Value *vgprArg = mergeIntoStruct(builder, newVgpr); - // Packing SGPR arguments (user data) into vector of i32s. - SmallVector userData; + // Packing SGPR arguments (user data + internal used SGPRs) into vector of i32s. + SmallVector sgprArgs; for (unsigned idx = 0; idx != numUserdata; ++idx) - userData.push_back(func->getArg(idx)); + sgprArgs.push_back(func->getArg(idx)); // tail: // Merge vgpr values from different exits. @@ -641,11 +681,11 @@ bool PatchEntryPointMutate::lowerCpsOps(Function *func, ShaderInputs *shaderInpu Value *jumpTarget = addressExtender.extend(addr32, builder.getInt32(HighAddrPc), builder.getPtrTy(), builder); const DataLayout &layout = func->getParent()->getDataLayout(); - SmallVector userDataI32; - splitIntoI32(layout, builder, userData, userDataI32); - Value *userDataVec = mergeDwordsIntoVector(builder, userDataI32); + SmallVector sgprI32; + splitIntoI32(layout, builder, sgprArgs, sgprI32); + Value *sgprVec = mergeDwordsIntoVector(builder, sgprI32); - SmallVector chainArgs = {jumpTarget, execMask, userDataVec, vgprArg}; + SmallVector chainArgs = {jumpTarget, execMask, sgprVec, vgprArg}; { // No flags @@ -654,7 +694,7 @@ bool PatchEntryPointMutate::lowerCpsOps(Function *func, ShaderInputs *shaderInpu #if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 465197 // Old version of the code - SmallVector chainArgTys = {builder.getPtrTy(), builder.getIntNTy(waveSize), userDataVec->getType(), + SmallVector chainArgTys = {builder.getPtrTy(), builder.getIntNTy(waveSize), sgprVec->getType(), vgprArg->getType(), builder.getInt32Ty()}; FunctionType *chainFuncTy = FunctionType::get(builder.getVoidTy(), chainArgTys, true); @@ -664,7 +704,7 @@ bool PatchEntryPointMutate::lowerCpsOps(Function *func, ShaderInputs *shaderInpu #else // New version of the code (also handles unknown version, which we treat as // latest) - Type *chainTys[] = {builder.getPtrTy(), builder.getIntNTy(waveSize), userDataVec->getType(), vgprArg->getType()}; + Type *chainTys[] = {builder.getPtrTy(), builder.getIntNTy(waveSize), sgprVec->getType(), vgprArg->getType()}; auto *chainCall = builder.CreateIntrinsic(Intrinsic::amdgcn_cs_chain, chainTys, chainArgs); // Add inreg attribute for (fn, exec, sgprs). for (unsigned arg = 0; arg < 3; arg++) @@ -675,7 +715,8 @@ bool PatchEntryPointMutate::lowerCpsOps(Function *func, ShaderInputs *shaderInpu auto *doc = m_pipelineState->getPalMetadata()->getDocument(); auto funcName = doc->getNode(func->getName(), /*copy=*/true); // Lower cps stack operations - stackLowering->lowerCpsStackOps(*func, m_funcCpsStackMap[func]); + Value *cspStorage = m_funcCpsStackMap[func]; + stackLowering->lowerCpsStackOps(func, nullptr, false, cspStorage); stackSize += stackLowering->getStackSizeInBytes(); // Set per-function .frontend_stack_size PAL metadata. @@ -722,7 +763,8 @@ Function *PatchEntryPointMutate::lowerCpsFunction(Function *func, ArrayRefgetContext(), Attribute::InReg); bool haveLocalInvocationId = !m_pipelineState->getShaderModes()->getComputeShaderMode().noLocalInvocationIdInCalls; - assert(haveLocalInvocationId == (argNames.back() == "LocalInvocationId")); + assert(haveLocalInvocationId == (argNames.back() == "LocalInvocationId") || + (argNames[argNames.size() - 2] == "LocalInvocationId")); AttributeList oldAttrs = func->getAttributes(); SmallVector argAttrs; @@ -945,7 +987,7 @@ void PatchEntryPointMutate::gatherUserDataUsage(Module *module) { if (haveDynamicUser) { userDataUsage->haveDynamicUserDataLoads = true; - self.m_pipelineState->getPalMetadata()->setUserDataSpillUsage(op.getOffset() / 4); + self.m_pipelineState->getPalMetadata()->setUserDataSpillUsage(op.getOffset() / 4, stage); } }) .add([](PatchEntryPointMutate &self, LoadUserDataOp &op) { @@ -1227,6 +1269,8 @@ void PatchEntryPointMutate::processComputeFuncs(ShaderInputs *shaderInputs, Modu // Create the new function and transfer code and attributes to it. Function *newFunc = nullptr; // For continufy based ray-tracing, we still need to add shader inputs like workgroupId and LocalInvocationId. + // TODO: All codes related to noLocalInvocationIdInCalls should be removed once we don't pass LocalInvocationId in + // legacy/continufy RT any more. bool haveLocalInvocationIdInCalls = !m_pipelineState->getShaderModes()->getComputeShaderMode().noLocalInvocationIdInCalls; if (cps::isCpsFunction(*origFunc)) { @@ -1375,20 +1419,16 @@ void PatchEntryPointMutate::setFuncAttrs(Function *entryPoint) { bool hasColorExport = false; // SpiShaderColFormat / mmSPI_SHADER_COL_FORMAT is used for fully compiled shaders unsigned colFormat = EXP_FORMAT_ZERO; - if (m_pipelineState->useRegisterFieldFormat()) { - auto &colFormatNode = m_pipelineState->getPalMetadata() - ->getPipelineNode() - .getMap(true)[Util::Abi::PipelineMetadataKey::GraphicsRegisters] - .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::SpiShaderColFormat] - .getMap(true); - for (auto iter = colFormatNode.begin(); iter != colFormatNode.end(); ++iter) { - if (iter->second.getUInt() != EXP_FORMAT_ZERO) { - colFormat = iter->second.getUInt(); - break; - } + auto &colFormatNode = m_pipelineState->getPalMetadata() + ->getPipelineNode() + .getMap(true)[Util::Abi::PipelineMetadataKey::GraphicsRegisters] + .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::SpiShaderColFormat] + .getMap(true); + for (auto iter = colFormatNode.begin(); iter != colFormatNode.end(); ++iter) { + if (iter->second.getUInt() != EXP_FORMAT_ZERO) { + colFormat = iter->second.getUInt(); + break; } - } else { - colFormat = m_pipelineState->getPalMetadata()->getRegister(mmSPI_SHADER_COL_FORMAT); } if (colFormat != EXP_FORMAT_ZERO) hasColorExport = true; @@ -1557,12 +1597,8 @@ uint64_t PatchEntryPointMutate::generateEntryPointArgTys(ShaderInputs *shaderInp if (userDataArg.userDataValue != static_cast(UserDataMapping::Invalid)) { // Most of user data metadata entries is 1 except for root push descriptors. bool isSystemUserData = isSystemUserDataValue(userDataArg.userDataValue); - unsigned numEntries = isSystemUserData ? 1 : dwordSize; assert((!isUnlinkedDescriptorSetValue(userDataArg.userDataValue) || dwordSize == 1) && "Expecting descriptor set values to be one dword. The linker cannot handle anything else."); - if (!m_pipelineState->useRegisterFieldFormat()) - m_pipelineState->getPalMetadata()->setUserDataEntry(m_shaderStage, userDataIdx, userDataArg.userDataValue, - numEntries); if (isSystemUserData) { unsigned index = userDataArg.userDataValue - static_cast(UserDataMapping::GlobalTable); auto &specialUserData = getUserDataUsage(m_shaderStage)->specialUserData; @@ -1602,7 +1638,7 @@ uint64_t PatchEntryPointMutate::generateEntryPointArgTys(ShaderInputs *shaderInp inRegMask |= shaderInputs->getShaderArgTys(m_pipelineState, m_shaderStage, origFunc, m_computeWithCalls, argTys, argNames, argOffset); - if (updateUserDataMap && m_pipelineState->useRegisterFieldFormat()) { + if (updateUserDataMap) { constexpr unsigned NumUserSgprs = 32; constexpr unsigned InvalidMapVal = static_cast(UserDataMapping::Invalid); SmallVector userDataMap; @@ -1914,13 +1950,13 @@ void PatchEntryPointMutate::finalizeUserDataArgs(SmallVectorImpl &u userDataArgs.emplace_back(builder.getInt32Ty(), "pad" + Twine(i)); } if (userDataSgprs < userDataDwords) - m_pipelineState->getPalMetadata()->setUserDataSpillUsage(userDataSgprs); + m_pipelineState->getPalMetadata()->setUserDataSpillUsage(userDataSgprs, m_shaderStage); // We must conservatively assume that there are functions with dynamic push constant accesses, and that therefore // the push constants must be fully available in the spill region even if they fit (partially) into SGPRs. const ResourceNode *node = m_pipelineState->findSingleRootResourceNode(ResourceNodeType::PushConst, m_shaderStage); if (node) - m_pipelineState->getPalMetadata()->setUserDataSpillUsage(node->offsetInDwords); + m_pipelineState->getPalMetadata()->setUserDataSpillUsage(node->offsetInDwords, m_shaderStage); } else { // Greedily fit as many generic user data arguments as possible. // Pre-allocate entryArgIdxs since we rely on stable pointers. @@ -1946,14 +1982,14 @@ void PatchEntryPointMutate::finalizeUserDataArgs(SmallVectorImpl &u userDataArgs.erase(userDataArgs.end() - lastSize, userDataArgs.end()); userDataEnd -= lastSize; assert(userDataEnd <= userDataAvailable); - m_pipelineState->getPalMetadata()->setUserDataSpillUsage(lastIdx); + m_pipelineState->getPalMetadata()->setUserDataSpillUsage(lastIdx, m_shaderStage); // Retry since the current load may now fit. continue; } } - m_pipelineState->getPalMetadata()->setUserDataSpillUsage(i); + m_pipelineState->getPalMetadata()->setUserDataSpillUsage(i, m_shaderStage); if (userDataEnd >= userDataAvailable) break; // All SGPRs in use, may as well give up. diff --git a/lgc/patch/PatchInOutImportExport.cpp b/lgc/patch/PatchInOutImportExport.cpp index 704f1fdedf..3fbfda80e3 100644 --- a/lgc/patch/PatchInOutImportExport.cpp +++ b/lgc/patch/PatchInOutImportExport.cpp @@ -30,7 +30,6 @@ *********************************************************************************************************************** */ #include "lgc/patch/PatchInOutImportExport.h" -#include "Gfx9Chip.h" #include "lgc/Builder.h" #include "lgc/BuiltIns.h" #include "lgc/LgcDialect.h" @@ -51,8 +50,11 @@ using namespace lgc; namespace lgc { +// Preferred number of HS threads per subgroup. +constexpr unsigned MaxHsThreadsPerSubgroup = 256; + // ===================================================================================================================== -PatchInOutImportExport::PatchInOutImportExport() : m_lds(nullptr) { +PatchInOutImportExport::PatchInOutImportExport() { memset(&m_gfxIp, 0, sizeof(m_gfxIp)); initPerShader(); } @@ -110,11 +112,6 @@ PreservedAnalyses PatchInOutImportExport::run(Module &module, ModuleAnalysisMana otherCallees.push_back(&func); } - // Create the global variable that is to model LDS - // NOTE: ES -> GS ring is always on-chip on GFX10+. - if (m_hasTs || m_hasGs) - m_lds = Patch::getLdsVariable(m_pipelineState, m_module); - // Set buffer formats based on specific GFX static const std::array BufferFormatsGfx10 = { BUF_FORMAT_32_FLOAT, @@ -376,7 +373,7 @@ void PatchInOutImportExport::processShader() { calcFactor.onChip.specialTfValueStart = calcFactor.onChip.hsPatchCountStart + 1; const unsigned maxNumHsWaves = - Gfx9::MaxHsThreadsPerSubgroup / m_pipelineState->getMergedShaderWaveSize(ShaderStage::TessControl); + MaxHsThreadsPerSubgroup / m_pipelineState->getMergedShaderWaveSize(ShaderStage::TessControl); calcFactor.specialTfValueSize = maxNumHsWaves * 2; calcFactor.tessOnChipLdsSize += 1 + calcFactor.specialTfValueSize; @@ -1526,13 +1523,11 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_s_barrier, {}); builder.CreateFence(AtomicOrdering::Acquire, syncScope); } else if (m_shaderStage == ShaderStage::Geometry) { - if (m_gfxIp.major >= 10) { - // NOTE: Per programming guide, we should do a "s_waitcnt 0,0,0 + s_waitcnt_vscnt 0" before issuing a "done", so - // we use fence release to generate s_waitcnt vmcnt lgkmcnt/s_waitcnt_vscnt before s_sendmsg(MSG_GS_DONE) - SyncScope::ID scope = - m_pipelineState->isGsOnChip() ? m_context->getOrInsertSyncScopeID("workgroup") : SyncScope::System; - builder.CreateFence(AtomicOrdering::Release, scope); - } + // NOTE: Per programming guide, we should do a "s_waitcnt 0,0,0 + s_waitcnt_vscnt 0" before issuing a "done", so + // we use fence release to generate s_waitcnt vmcnt lgkmcnt/s_waitcnt_vscnt before s_sendmsg(MSG_GS_DONE) + SyncScope::ID scope = + m_pipelineState->isGsOnChip() ? m_context->getOrInsertSyncScopeID("workgroup") : SyncScope::System; + builder.CreateFence(AtomicOrdering::Release, scope); auto &entryArgIdxs = m_pipelineState->getShaderInterfaceData(ShaderStage::Geometry)->entryArgIdxs.gs; auto gsWaveId = getFunctionArgument(m_entryPoint, entryArgIdxs.gsWaveId); @@ -2067,21 +2062,10 @@ void PatchInOutImportExport::patchGsGenericOutputExport(Value *output, unsigned void PatchInOutImportExport::patchMeshGenericOutputExport(Value *output, unsigned location, Value *locOffset, Value *compIdx, Value *vertexOrPrimitiveIdx, bool isPerPrimitive, BuilderBase &builder) { - // outputOffset = (location + locOffset) * 4 + compIdx * (bitWidth == 64 ? 2 : 1) - Value *outputOffset = builder.CreateAdd(builder.getInt32(location), locOffset); - outputOffset = builder.CreateShl(outputOffset, 2); - - auto outputTy = output->getType(); - if (outputTy->getScalarSizeInBits() == 64) { + if (output->getType()->getScalarSizeInBits() == 64) compIdx = builder.CreateShl(compIdx, 1); - } - - outputOffset = builder.CreateAdd(outputOffset, compIdx); - if (isPerPrimitive) - builder.create(outputOffset, vertexOrPrimitiveIdx, output); - else - builder.create(outputOffset, vertexOrPrimitiveIdx, output); + builder.create(isPerPrimitive, location, locOffset, compIdx, vertexOrPrimitiveIdx, output); } // ===================================================================================================================== @@ -2447,7 +2431,8 @@ Value *PatchInOutImportExport::patchFsBuiltInInputImport(Type *inputTy, unsigned // There is a special case when vkCreateGraphicsPipelines but not set sampleRate, but compiling shader // will ask to set runAtSampleRate, this case is valid but current code will cause hang. // So in this case, it will not use broadcast sample mask. - if (m_pipelineState->getRasterizerState().perSampleShading || builtInUsage.runAtSampleRate) { + if (!m_pipelineState->getOptions().disableSampleCoverageAdjust && + (m_pipelineState->getRasterizerState().perSampleShading || builtInUsage.runAtSampleRate)) { unsigned baseMask = 1; if (!builtInUsage.sampleId) { if (m_pipelineState->getRasterizerState().pixelShaderSamples != 0) { @@ -3403,15 +3388,10 @@ void PatchInOutImportExport::patchMeshBuiltInOutputExport(Value *output, unsigne (void(builtInUsage)); // Unused - // outputOffset = location * 4 + elemIdx - Value *outputOffset = builder.getInt32(4 * loc); - if (elemIdx) - outputOffset = builder.CreateAdd(builder.getInt32(4 * loc), elemIdx); + if (!elemIdx) + elemIdx = builder.getInt32(0); - if (isPerPrimitive) - builder.create(outputOffset, vertexOrPrimitiveIdx, output); - else - builder.create(outputOffset, vertexOrPrimitiveIdx, output); + builder.create(isPerPrimitive, loc, builder.getInt32(0), elemIdx, vertexOrPrimitiveIdx, output); } // ===================================================================================================================== @@ -3859,10 +3839,9 @@ void PatchInOutImportExport::storeValueToEsGsRing(Value *storeValue, unsigned lo auto ringOffset = calcEsGsRingOffsetForOutput(location, compIdx, esGsOffset, builder); // ES -> GS ring is always on-chip on GFX10+ - Value *idxs[] = {builder.getInt32(0), ringOffset}; - auto ldsType = m_lds->getValueType(); - Value *storePtr = builder.CreateGEP(ldsType, m_lds, idxs); - builder.CreateAlignedStore(storeValue, storePtr, m_lds->getAlign().value()); + auto lds = Patch::getLdsVariable(m_pipelineState, m_entryPoint); + Value *storePtr = builder.CreateGEP(builder.getInt32Ty(), lds, ringOffset); + builder.CreateAlignedStore(storeValue, storePtr, lds->getPointerAlignment(m_module->getDataLayout())); } } @@ -3905,10 +3884,9 @@ Value *PatchInOutImportExport::loadValueFromEsGsRing(Type *loadTy, unsigned loca } else { Value *ringOffset = calcEsGsRingOffsetForInput(location, compIdx, vertexIdx, builder); // ES -> GS ring is always on-chip on GFX10+ - Value *idxs[] = {builder.getInt32(0), ringOffset}; - auto ldsType = m_lds->getValueType(); - auto *loadPtr = builder.CreateGEP(ldsType, m_lds, idxs); - loadValue = builder.CreateAlignedLoad(loadTy, loadPtr, m_lds->getAlign().value()); + auto lds = Patch::getLdsVariable(m_pipelineState, m_entryPoint); + auto *loadPtr = builder.CreateGEP(builder.getInt32Ty(), lds, ringOffset); + loadValue = builder.CreateAlignedLoad(loadTy, loadPtr, lds->getPointerAlignment(m_module->getDataLayout())); } return loadValue; @@ -3989,10 +3967,9 @@ void PatchInOutImportExport::storeValueToGsVsRing(Value *storeValue, unsigned lo auto ringOffset = calcGsVsRingOffsetForOutput(location, compIdx, streamId, emitCounter, gsVsOffset, builder); if (m_pipelineState->isGsOnChip()) { - Value *idxs[] = {builder.getInt32(0), ringOffset}; - auto ldsType = m_lds->getValueType(); - Value *storePtr = builder.CreateGEP(ldsType, m_lds, idxs); - builder.CreateAlignedStore(storeValue, storePtr, m_lds->getAlign().value()); + auto lds = Patch::getLdsVariable(m_pipelineState, m_entryPoint); + Value *storePtr = builder.CreateGEP(builder.getInt32Ty(), lds, ringOffset); + builder.CreateAlignedStore(storeValue, storePtr, lds->getPointerAlignment(m_module->getDataLayout())); } else { // NOTE: Here we use tbuffer_store instruction instead of buffer_store because we have to do explicit // control of soffset. This is required by swizzle enabled mode when address range checking should be @@ -4135,7 +4112,6 @@ Value *PatchInOutImportExport::calcGsVsRingOffsetForOutput(unsigned location, un // @param ldsOffset : Start offset to do LDS read operations // @param builder : The IR builder to create and insert IR instruction Value *PatchInOutImportExport::readValueFromLds(bool offChip, Type *readTy, Value *ldsOffset, BuilderBase &builder) { - assert(m_lds); assert(readTy->isSingleValueType()); // Read dwords from LDS @@ -4177,10 +4153,9 @@ Value *PatchInOutImportExport::readValueFromLds(bool offChip, Type *readTy, Valu } else { // Read from on-chip LDS for (unsigned i = 0; i < numChannels; ++i) { - Value *idxs[] = {builder.getInt32(0), ldsOffset}; - auto ldsType = m_lds->getValueType(); - auto *loadPtr = builder.CreateGEP(ldsType, m_lds, idxs); - auto loadTy = GetElementPtrInst::getIndexedType(ldsType, idxs); + auto loadTy = builder.getInt32Ty(); + auto lds = Patch::getLdsVariable(m_pipelineState, m_entryPoint); + auto *loadPtr = builder.CreateGEP(loadTy, lds, ldsOffset); loadValues[i] = builder.CreateLoad(loadTy, loadPtr); ldsOffset = builder.CreateAdd(ldsOffset, builder.getInt32(1)); @@ -4219,8 +4194,6 @@ Value *PatchInOutImportExport::readValueFromLds(bool offChip, Type *readTy, Valu // @param ldsOffset : Start offset to do LDS write operations // @param builder : The IR builder to create and insert IR instruction void PatchInOutImportExport::writeValueToLds(bool offChip, Value *writeValue, Value *ldsOffset, BuilderBase &builder) { - assert(m_lds); - auto writeTy = writeValue->getType(); assert(writeTy->isSingleValueType()); @@ -4270,9 +4243,8 @@ void PatchInOutImportExport::writeValueToLds(bool offChip, Value *writeValue, Va } else { // Write to on-chip LDS for (unsigned i = 0; i < numChannels; ++i) { - Value *idxs[] = {builder.getInt32(0), ldsOffset}; - auto ldsType = m_lds->getValueType(); - Value *storePtr = builder.CreateGEP(ldsType, m_lds, idxs); + auto lds = Patch::getLdsVariable(m_pipelineState, m_entryPoint); + Value *storePtr = builder.CreateGEP(builder.getInt32Ty(), lds, ldsOffset); builder.CreateStore(storeValues[i], storePtr); ldsOffset = builder.CreateAdd(ldsOffset, builder.getInt32(1)); @@ -4525,7 +4497,7 @@ unsigned PatchInOutImportExport::calcPatchCountPerThreadGroup(unsigned inVertexC unsigned outVertexCount, unsigned outVertexStride, unsigned patchConstCount, unsigned tessFactorStride) const { - unsigned maxThreadCountPerThreadGroup = Gfx9::MaxHsThreadsPerSubgroup; + unsigned maxThreadCountPerThreadGroup = MaxHsThreadsPerSubgroup; // NOTE: If ray query uses LDS stack, the expected max thread count in the group is 64. And we force wave size // to be 64 in order to keep all threads in the same wave. In the future, we could consider to get rid of this @@ -4555,7 +4527,7 @@ unsigned PatchInOutImportExport::calcPatchCountPerThreadGroup(unsigned inVertexC // count actual HS patches. assert(m_gfxIp.major >= 11); const unsigned maxNumHsWaves = - Gfx9::MaxHsThreadsPerSubgroup / m_pipelineState->getMergedShaderWaveSize(ShaderStage::TessControl); + MaxHsThreadsPerSubgroup / m_pipelineState->getMergedShaderWaveSize(ShaderStage::TessControl); ldsSizePerThreadGroup -= 1 + maxNumHsWaves * 2; } ldsSizePerThreadGroup -= rayQueryLdsStackSize; // Exclude LDS space used as ray query stack diff --git a/lgc/patch/PatchInitializeWorkgroupMemory.cpp b/lgc/patch/PatchInitializeWorkgroupMemory.cpp index 7e60b158d3..fd81de41e9 100644 --- a/lgc/patch/PatchInitializeWorkgroupMemory.cpp +++ b/lgc/patch/PatchInitializeWorkgroupMemory.cpp @@ -103,7 +103,7 @@ PreservedAnalyses PatchInitializeWorkgroupMemory::run(Module &module, ModuleAnal GlobalVariable *global = globalOffsetPair.first; Value *offset = globalOffsetPair.second; - Value *pointer = builder.CreateGEP(lds->getValueType(), lds, {builder.getInt32(0), offset}); + Value *pointer = builder.CreateGEP(builder.getInt32Ty(), lds, offset); pointer = builder.CreateBitCast(pointer, global->getType()); global->replaceAllUsesWith(pointer); @@ -213,7 +213,7 @@ void PatchInitializeWorkgroupMemory::initializeWithZero(GlobalVariable *lds, Bui // ldsOffset = (threadId * loopCount) + loopIdx Value *ldsOffset = builder.CreateMul(threadId, loopCount); ldsOffset = builder.CreateAdd(ldsOffset, loopIdxPhi); - Value *writePtr = builder.CreateGEP(lds->getValueType(), lds, {builder.getInt32(0), ldsOffset}); + Value *writePtr = builder.CreateGEP(builder.getInt32Ty(), lds, ldsOffset); builder.CreateAlignedStore(builder.getInt32(0), writePtr, Align(4)); // Update loop index diff --git a/lgc/patch/PatchInvariantLoads.cpp b/lgc/patch/PatchInvariantLoads.cpp index d05ba4ebff..2890046d90 100644 --- a/lgc/patch/PatchInvariantLoads.cpp +++ b/lgc/patch/PatchInvariantLoads.cpp @@ -98,7 +98,7 @@ PreservedAnalyses PatchInvariantLoads::run(Function &function, FunctionAnalysisM bool clearInvariants = options.aggressiveInvariantLoads == ClearInvariants; bool aggressiveInvariants = options.aggressiveInvariantLoads == EnableOptimization; - if (options.aggressiveInvariantLoads == Auto && pipelineState->getTargetInfo().getGfxIpVersion().major >= 10) { + if (options.aggressiveInvariantLoads == Auto) { switch (function.getCallingConv()) { case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_LS: diff --git a/lgc/patch/PatchPreparePipelineAbi.cpp b/lgc/patch/PatchPreparePipelineAbi.cpp index 479a5f871d..a42f59cc22 100644 --- a/lgc/patch/PatchPreparePipelineAbi.cpp +++ b/lgc/patch/PatchPreparePipelineAbi.cpp @@ -29,7 +29,6 @@ *********************************************************************************************************************** */ #include "lgc/patch/PatchPreparePipelineAbi.h" -#include "Gfx9ConfigBuilder.h" #include "MeshTaskShader.h" #include "RegisterMetadataBuilder.h" #include "ShaderMerger.h" @@ -110,14 +109,14 @@ PreservedAnalyses PatchPreparePipelineAbi::run(Module &module, ModuleAnalysisMan // @param builder : IR builder to insert instructions std::pair PatchPreparePipelineAbi::readTessFactors(PipelineState *pipelineState, Value *relPatchId, IRBuilder<> &builder) { - auto module = builder.GetInsertBlock()->getModule(); - auto lds = Patch::getLdsVariable(pipelineState, module); + auto func = builder.GetInsertBlock()->getParent(); + auto lds = Patch::getLdsVariable(pipelineState, func); // Helper to read value from LDS auto readValueFromLds = [&](Type *readTy, Value *ldsOffset) { assert(readTy->getScalarSizeInBits() == 32); // Only accept 32-bit data - Value *readPtr = builder.CreateGEP(lds->getValueType(), lds, {builder.getInt32(0), ldsOffset}); + Value *readPtr = builder.CreateGEP(builder.getInt32Ty(), lds, ldsOffset); readPtr = builder.CreateBitCast(readPtr, PointerType::get(readTy, readPtr->getType()->getPointerAddressSpace())); return builder.CreateAlignedLoad(readTy, readPtr, Align(4)); }; @@ -229,14 +228,14 @@ void PatchPreparePipelineAbi::writeTessFactors(PipelineState *pipelineState, Val if (primitiveMode == PrimitiveMode::Isolines) { assert(numOuterTfs == 2 && numInnerTfs == 0); - auto callInst = builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, outerTf->getType(), - {outerTf, // vdata - tfBufferDesc, // rsrc - tfBufferOffset, // voffset - tfBufferBase, // soffset - builder.getInt32(bufferFormatX2), // format - builder.getInt32(coherent.u32All)}); // glc - (void)callInst; + builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, outerTf->getType(), + {outerTf, // vdata + tfBufferDesc, // rsrc + tfBufferOffset, // voffset + tfBufferBase, // soffset + builder.getInt32(bufferFormatX2), // format + builder.getInt32(coherent.u32All)}); // glc + } else if (primitiveMode == PrimitiveMode::Triangles) { assert(numOuterTfs == 3 && numInnerTfs == 1); @@ -245,35 +244,33 @@ void PatchPreparePipelineAbi::writeTessFactors(PipelineState *pipelineState, Val tessFactor = builder.CreateInsertElement(tessFactor, builder.CreateExtractElement(innerTf, static_cast(0)), 3); - auto callInst = builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, tessFactor->getType(), - {tessFactor, // vdata - tfBufferDesc, // rsrc - tfBufferOffset, // voffset - tfBufferBase, // soffset - builder.getInt32(bufferFormatX4), // format - builder.getInt32(coherent.u32All)}); // glc - (void)callInst; + builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, tessFactor->getType(), + {tessFactor, // vdata + tfBufferDesc, // rsrc + tfBufferOffset, // voffset + tfBufferBase, // soffset + builder.getInt32(bufferFormatX4), // format + builder.getInt32(coherent.u32All)}); // glc } else { assert(primitiveMode == PrimitiveMode::Quads); assert(numOuterTfs == 4 && numInnerTfs == 2); - auto callInst = builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, outerTf->getType(), - {outerTf, // vdata - tfBufferDesc, // rsrc - tfBufferOffset, // voffset - tfBufferBase, // soffset - builder.getInt32(bufferFormatX4), // format - builder.getInt32(coherent.u32All)}); // glc + builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, outerTf->getType(), + {outerTf, // vdata + tfBufferDesc, // rsrc + tfBufferOffset, // voffset + tfBufferBase, // soffset + builder.getInt32(bufferFormatX4), // format + builder.getInt32(coherent.u32All)}); // glc tfBufferOffset = builder.CreateAdd(tfBufferOffset, builder.getInt32(4 * sizeof(float))); - callInst = builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, innerTf->getType(), - {innerTf, // vdata - tfBufferDesc, // rsrc - tfBufferOffset, // voffset - tfBufferBase, // soffset - builder.getInt32(bufferFormatX2), // format - builder.getInt32(coherent.u32All)}); // glc - (void)callInst; + builder.CreateIntrinsic(Intrinsic::amdgcn_raw_tbuffer_store, innerTf->getType(), + {innerTf, // vdata + tfBufferDesc, // rsrc + tfBufferOffset, // voffset + tfBufferBase, // soffset + builder.getInt32(bufferFormatX2), // format + builder.getInt32(coherent.u32All)}); // glc } } @@ -282,8 +279,6 @@ void PatchPreparePipelineAbi::writeTessFactors(PipelineState *pipelineState, Val // // @param module : LLVM module void PatchPreparePipelineAbi::mergeShader(Module &module) { - assert(m_gfxIp.major >= 10); - const bool hasTs = (m_hasTcs || m_hasTes); if (m_pipelineState->isGraphics()) { @@ -425,13 +420,8 @@ void PatchPreparePipelineAbi::setAbiEntryNames(Module &module) { // // @param module : LLVM module void PatchPreparePipelineAbi::addAbiMetadata(Module &module) { - if (m_pipelineState->useRegisterFieldFormat()) { - Gfx9::RegisterMetadataBuilder regMetadataBuilder(&module, m_pipelineState, m_pipelineShaders); - regMetadataBuilder.buildPalMetadata(); - } else { - Gfx9::ConfigBuilder configBuilder(&module, m_pipelineState); - configBuilder.buildPalMetadata(); - } + RegisterMetadataBuilder regMetadataBuilder(&module, m_pipelineState, m_pipelineShaders); + regMetadataBuilder.buildPalMetadata(); } // ===================================================================================================================== diff --git a/lgc/patch/PatchResourceCollect.cpp b/lgc/patch/PatchResourceCollect.cpp index fb8a9f5eca..55f1c0f3c7 100644 --- a/lgc/patch/PatchResourceCollect.cpp +++ b/lgc/patch/PatchResourceCollect.cpp @@ -29,7 +29,6 @@ *********************************************************************************************************************** */ #include "lgc/patch/PatchResourceCollect.h" -#include "Gfx9Chip.h" #include "MeshTaskShader.h" #include "NggPrimShader.h" #include "lgc/Builder.h" @@ -59,6 +58,15 @@ cl::opt DisableGsOnChip("disable-gs-onchip", cl::desc("Disable geometry sh namespace lgc { +// Max size of primitives per subgroup for adjacency primitives or when GS instancing is used. This restriction is +// applicable only when onchip GS is used. +constexpr unsigned OnChipGsMaxPrimPerSubgroup = 255; +constexpr unsigned OnChipGsMaxPrimPerSubgroupAdj = 127; +constexpr unsigned OnChipGsMaxEsVertsPerSubgroup = 255; + +// Default value for the maximum LDS size per GS subgroup, in dword's. +constexpr unsigned DefaultLdsSizePerSubgroup = 8192; + // ===================================================================================================================== PatchResourceCollect::PatchResourceCollect() : m_resUsage(nullptr) { } @@ -150,7 +158,6 @@ PreservedAnalyses PatchResourceCollect::run(Module &module, ModuleAnalysisManage // @param [in/out] module : Module void PatchResourceCollect::setNggControl(Module *module) { assert(m_pipelineState->isGraphics()); - assert(m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 10); // If mesh pipeline, skip NGG control settings const bool meshPipeline = m_pipelineState->hasShaderStage(ShaderStage::Task) || m_pipelineState->hasShaderStage(ShaderStage::Mesh); @@ -192,8 +199,8 @@ void PatchResourceCollect::setNggControl(Module *module) { nggControl.backfaceExponent = options.nggBackfaceExponent; nggControl.subgroupSizing = options.nggSubgroupSizing; - nggControl.primsPerSubgroup = std::min(options.nggPrimsPerSubgroup, Gfx9::NggMaxThreadsPerSubgroup); - nggControl.vertsPerSubgroup = std::min(options.nggVertsPerSubgroup, Gfx9::NggMaxThreadsPerSubgroup); + nggControl.primsPerSubgroup = std::min(options.nggPrimsPerSubgroup, NggMaxThreadsPerSubgroup); + nggControl.vertsPerSubgroup = std::min(options.nggVertsPerSubgroup, NggMaxThreadsPerSubgroup); if (nggControl.enableNgg) { if (options.nggFlags & NggFlagForceCullingMode) @@ -259,7 +266,6 @@ void PatchResourceCollect::setNggControl(Module *module) { // @param [in/out] module : Module bool PatchResourceCollect::canUseNgg(Module *module) { assert(m_pipelineState->isGraphics()); - assert(m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 10); // Always enable NGG for GFX11+ if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 11) @@ -289,7 +295,7 @@ bool PatchResourceCollect::canUseNgg(Module *module) { // NGG subgroup is implicitly 3 (specified by HW). Thus, the maximum primitive amplification factor is therefore // 256/3 = 85. if (m_pipelineState->getTargetInfo().getGpuWorkarounds().gfx10.waLimitedMaxOutputVertexCount) { - static const unsigned MaxOutputVertices = Gfx9::NggMaxThreadsPerSubgroup / 3; + static const unsigned MaxOutputVertices = NggMaxThreadsPerSubgroup / 3; if (geometryMode.outputVertices > MaxOutputVertices) return false; } @@ -297,7 +303,7 @@ bool PatchResourceCollect::canUseNgg(Module *module) { // NOTE: On GFX10, the bit VGT_GS_INSTANCE_CNT.EN_MAX_VERT_OUT_PER_GS_INSTANCE provided by HW allows each GS // instance to emit maximum vertices (256). But this mode is not supported when tessellation is enabled. if (m_pipelineState->getTargetInfo().getGpuWorkarounds().gfx10.waGeNggMaxVertOutWithGsInstancing) { - if (geometryMode.invocations * geometryMode.outputVertices > Gfx9::NggMaxThreadsPerSubgroup) + if (geometryMode.invocations * geometryMode.outputVertices > NggMaxThreadsPerSubgroup) return false; } } @@ -312,7 +318,6 @@ bool PatchResourceCollect::canUseNgg(Module *module) { // @param [in/out] module : Module bool PatchResourceCollect::canUseNggCulling(Module *module) { assert(m_pipelineState->isGraphics()); - assert(m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 10); const bool hasTs = m_pipelineState->hasShaderStage(ShaderStage::TessControl) || m_pipelineState->hasShaderStage(ShaderStage::TessEval); @@ -498,11 +503,8 @@ bool PatchResourceCollect::checkGsOnChipValidity() { const unsigned numMeshThreads = meshMode.workgroupSizeX * meshMode.workgroupSizeY * meshMode.workgroupSizeZ; unsigned primAmpFactor = std::max(numMeshThreads, std::max(meshMode.outputVertices, meshMode.outputPrimitives)); - const unsigned ldsSizeDwordGranularity = - 1u << m_pipelineState->getTargetInfo().getGpuProperty().ldsSizeDwordGranularityShift; - auto ldsSizeDwords = + const unsigned ldsSizeDwords = MeshTaskShader::layoutMeshShaderLds(m_pipelineState, m_pipelineShaders->getEntryPoint(ShaderStage::Mesh)); - ldsSizeDwords = alignTo(ldsSizeDwords, ldsSizeDwordGranularity); // Make sure we don't allocate more than what can legally be allocated by a single subgroup on the hardware. unsigned maxHwGsLdsSizeDwords = m_pipelineState->getTargetInfo().getGpuProperty().gsOnChipMaxLdsSize; @@ -521,7 +523,9 @@ bool PatchResourceCollect::checkGsOnChipValidity() { gsOnChip = true; // For mesh shader, GS is always on-chip } else if (nggControl->enableNgg) { - unsigned esGsRingItemSize = NggPrimShader::calcEsGsRingItemSize(m_pipelineState); // In dwords + unsigned esGsRingItemSize = NggPrimShader::calcEsGsRingItemSize( + m_pipelineState, + m_pipelineShaders->getEntryPoint(hasTs ? ShaderStage::TessEval : ShaderStage::Vertex)); // In dwords const unsigned gsVsRingItemSize = hasGs ? std::max(1u, 4 * gsResUsage->inOutUsage.outputMapLocCount * geometryMode.outputVertices) : 0; @@ -542,15 +546,15 @@ bool PatchResourceCollect::checkGsOnChipValidity() { // The numbers below come from hardware guidance and most likely require further tuning. switch (nggControl->subgroupSizing) { case NggSubgroupSizing::HalfSize: - esVertsPerSubgroup = Gfx9::NggMaxThreadsPerSubgroup / 2; - gsPrimsPerSubgroup = Gfx9::NggMaxThreadsPerSubgroup / 2; + esVertsPerSubgroup = NggMaxThreadsPerSubgroup / 2; + gsPrimsPerSubgroup = NggMaxThreadsPerSubgroup / 2; break; case NggSubgroupSizing::OptimizeForVerts: - esVertsPerSubgroup = hasTs ? Gfx9::NggMaxThreadsPerSubgroup / 2 : (Gfx9::NggMaxThreadsPerSubgroup / 2 - 2); - gsPrimsPerSubgroup = hasTs || needsLds ? 192 : Gfx9::NggMaxThreadsPerSubgroup; + esVertsPerSubgroup = hasTs ? NggMaxThreadsPerSubgroup / 2 : (NggMaxThreadsPerSubgroup / 2 - 2); + gsPrimsPerSubgroup = hasTs || needsLds ? 192 : NggMaxThreadsPerSubgroup; break; case NggSubgroupSizing::OptimizeForPrims: - esVertsPerSubgroup = Gfx9::NggMaxThreadsPerSubgroup; + esVertsPerSubgroup = NggMaxThreadsPerSubgroup; gsPrimsPerSubgroup = 128; break; case NggSubgroupSizing::Explicit: @@ -559,18 +563,18 @@ bool PatchResourceCollect::checkGsOnChipValidity() { break; case NggSubgroupSizing::Auto: if (m_pipelineState->getTargetInfo().getGfxIpVersion().isGfx(10, 1)) { - esVertsPerSubgroup = Gfx9::NggMaxThreadsPerSubgroup / 2 - 2; - gsPrimsPerSubgroup = Gfx9::NggMaxThreadsPerSubgroup / 2; + esVertsPerSubgroup = NggMaxThreadsPerSubgroup / 2 - 2; + gsPrimsPerSubgroup = NggMaxThreadsPerSubgroup / 2; } else { // Newer hardware performs the decrement on esVertsPerSubgroup for us already. - esVertsPerSubgroup = Gfx9::NggMaxThreadsPerSubgroup / 2; - gsPrimsPerSubgroup = Gfx9::NggMaxThreadsPerSubgroup / 2; + esVertsPerSubgroup = NggMaxThreadsPerSubgroup / 2; + gsPrimsPerSubgroup = NggMaxThreadsPerSubgroup / 2; } break; case NggSubgroupSizing::MaximumSize: default: - esVertsPerSubgroup = Gfx9::NggMaxThreadsPerSubgroup; - gsPrimsPerSubgroup = Gfx9::NggMaxThreadsPerSubgroup; + esVertsPerSubgroup = NggMaxThreadsPerSubgroup; + gsPrimsPerSubgroup = NggMaxThreadsPerSubgroup; break; } @@ -587,19 +591,19 @@ bool PatchResourceCollect::checkGsOnChipValidity() { // from a different thread. Note that maxVertOut does not account for additional amplification due // to GS instancing. gsPrimsPerSubgroup = - std::max(1u, std::min(gsPrimsPerSubgroup, Gfx9::NggMaxThreadsPerSubgroup / (maxVertOut * gsInstanceCount))); + std::max(1u, std::min(gsPrimsPerSubgroup, NggMaxThreadsPerSubgroup / (maxVertOut * gsInstanceCount))); // NOTE: If one input GS primitive generates too many vertices (consider GS instancing) and they couldn't be // within a NGG subgroup, we enable maximum vertex output per GS instance. This will set the register field // EN_MAX_VERT_OUT_PER_GS_INSTANCE and turn off vertex reuse, restricting 1 input GS input // primitive per subgroup and create 1 subgroup per GS instance. - if ((maxVertOut * gsInstanceCount) > Gfx9::NggMaxThreadsPerSubgroup) { + if ((maxVertOut * gsInstanceCount) > NggMaxThreadsPerSubgroup) { enableMaxVertOut = true; gsInstanceCount = 1; gsPrimsPerSubgroup = 1; } - esVertsPerSubgroup = std::min(gsPrimsPerSubgroup * inVertsPerPrim, Gfx9::NggMaxThreadsPerSubgroup); + esVertsPerSubgroup = std::min(gsPrimsPerSubgroup * inVertsPerPrim, NggMaxThreadsPerSubgroup); if (hasTs) esVertsPerSubgroup = std::min(esVertsPerSubgroup, OptimalVerticesPerPrimitiveForTess * gsPrimsPerSubgroup); @@ -635,9 +639,7 @@ bool PatchResourceCollect::checkGsOnChipValidity() { unsigned expectedEsLdsSize = esVertsPerSubgroup * esGsRingItemSize + esExtraLdsSize; unsigned expectedGsLdsSize = gsPrimsPerSubgroup * gsInstanceCount * gsVsRingItemSize + gsExtraLdsSize; - const unsigned ldsSizeDwordGranularity = - 1u << m_pipelineState->getTargetInfo().getGpuProperty().ldsSizeDwordGranularityShift; - unsigned ldsSizeDwords = alignTo(expectedEsLdsSize + expectedGsLdsSize, ldsSizeDwordGranularity); + unsigned ldsSizeDwords = expectedEsLdsSize + expectedGsLdsSize; unsigned maxHwGsLdsSizeDwords = m_pipelineState->getTargetInfo().getGpuProperty().gsOnChipMaxLdsSize; maxHwGsLdsSizeDwords -= rayQueryLdsStackSize; // Exclude LDS space used as ray query stack @@ -678,8 +680,8 @@ bool PatchResourceCollect::checkGsOnChipValidity() { // inVertsPerPrim is the minimum number of vertices we must have per subgroup. esVertsPerSubgroup = - std::max(inVertsPerPrim, std::min(static_cast(gsPrimsPerSubgroup * esVertToGsPrimRatio), - Gfx9::NggMaxThreadsPerSubgroup)); + std::max(inVertsPerPrim, + std::min(static_cast(gsPrimsPerSubgroup * esVertToGsPrimRatio), NggMaxThreadsPerSubgroup)); // Low values of esVertsPerSubgroup are illegal. These numbers below come from HW restrictions. if (gfxIp.isGfx(10, 3)) @@ -698,7 +700,7 @@ bool PatchResourceCollect::checkGsOnChipValidity() { // And then recalculate our LDS usage. expectedEsLdsSize = (esVertsPerSubgroup * esGsRingItemSize) + esExtraLdsSize; expectedGsLdsSize = (gsPrimsPerSubgroup * gsInstanceCount * gsVsRingItemSize) + gsExtraLdsSize; - ldsSizeDwords = alignTo(expectedEsLdsSize + expectedGsLdsSize, ldsSizeDwordGranularity); + ldsSizeDwords = expectedEsLdsSize + expectedGsLdsSize; } // Make sure we don't allocate more than what can legally be allocated by a single subgroup on the hardware. @@ -727,9 +729,6 @@ bool PatchResourceCollect::checkGsOnChipValidity() { gsOnChip = true; // In NGG mode, GS is always on-chip since copy shader is not present. } else { - unsigned ldsSizeDwordGranularity = - static_cast(1 << m_pipelineState->getTargetInfo().getGpuProperty().ldsSizeDwordGranularityShift); - // gsPrimsPerSubgroup shouldn't be bigger than wave size. unsigned gsPrimsPerSubgroup = std::min(m_pipelineState->getTargetInfo().getGpuProperty().gsOnChipDefaultPrimsPerSubgroup, @@ -748,7 +747,7 @@ bool PatchResourceCollect::checkGsOnChipValidity() { // TODO: Confirm no ES-GS extra LDS space used. const unsigned esGsExtraLdsDwords = 0; - const unsigned maxEsVertsPerSubgroup = Gfx9::OnChipGsMaxEsVertsPerSubgroup; + const unsigned maxEsVertsPerSubgroup = OnChipGsMaxEsVertsPerSubgroup; unsigned esMinVertsPerSubgroup = inVertsPerPrim; @@ -756,12 +755,12 @@ bool PatchResourceCollect::checkGsOnChipValidity() { if (useAdjacency) esMinVertsPerSubgroup >>= 1; - unsigned maxGsPrimsPerSubgroup = Gfx9::OnChipGsMaxPrimPerSubgroup; + unsigned maxGsPrimsPerSubgroup = OnChipGsMaxPrimPerSubgroup; // There is a hardware requirement for gsPrimsPerSubgroup * gsInstanceCount to be capped by // OnChipGsMaxPrimPerSubgroup for adjacency primitive or when GS instanceing is used. if (useAdjacency || gsInstanceCount > 1) - maxGsPrimsPerSubgroup = (Gfx9::OnChipGsMaxPrimPerSubgroupAdj / gsInstanceCount); + maxGsPrimsPerSubgroup = (OnChipGsMaxPrimPerSubgroupAdj / gsInstanceCount); gsPrimsPerSubgroup = std::min(gsPrimsPerSubgroup, maxGsPrimsPerSubgroup); @@ -769,10 +768,10 @@ bool PatchResourceCollect::checkGsOnChipValidity() { unsigned worstCaseEsVertsPerSubgroup = std::min(esMinVertsPerSubgroup * gsPrimsPerSubgroup * reuseOffMultiplier, maxEsVertsPerSubgroup); - unsigned esGsLdsSize = (esGsRingItemSize * worstCaseEsVertsPerSubgroup); + unsigned esGsLdsSize = esGsRingItemSize * worstCaseEsVertsPerSubgroup; - // Total LDS use per subgroup aligned to the register granularity. - unsigned gsOnChipLdsSize = alignTo(esGsLdsSize + esGsExtraLdsDwords, ldsSizeDwordGranularity); + // Total LDS use per subgroup. + unsigned gsOnChipLdsSize = esGsLdsSize + esGsExtraLdsDwords; // NOTE: If ray query uses LDS stack, the expected max thread count in the group is 64. And we force wave size // to be 64 in order to keep all threads in the same wave. In the future, we could consider to get rid of this @@ -786,7 +785,7 @@ bool PatchResourceCollect::checkGsOnChipValidity() { // Use the client-specified amount of LDS space per subgroup. If they specified zero, they want us to // choose a reasonable default. The final amount must be 128-dword aligned. // TODO: Accept DefaultLdsSizePerSubgroup from panel setting - unsigned maxLdsSize = Gfx9::DefaultLdsSizePerSubgroup; + unsigned maxLdsSize = DefaultLdsSizePerSubgroup; maxLdsSize -= rayQueryLdsStackSize; // Exclude LDS space used as ray query stack // If total LDS usage is too big, refactor partitions based on ratio of ES-GS item sizes. @@ -804,7 +803,7 @@ bool PatchResourceCollect::checkGsOnChipValidity() { assert(gsPrimsPerSubgroup > 0); esGsLdsSize = (esGsRingItemSize * worstCaseEsVertsPerSubgroup); - gsOnChipLdsSize = alignTo(esGsLdsSize + esGsExtraLdsDwords, ldsSizeDwordGranularity); + gsOnChipLdsSize = esGsLdsSize + esGsExtraLdsDwords; assert(gsOnChipLdsSize <= maxLdsSize); } @@ -821,8 +820,8 @@ bool PatchResourceCollect::checkGsOnChipValidity() { // Start out with the assumption that our GS prims per subgroup won't change. unsigned onchipGsPrimsPerSubgroup = gsPrimsPerSubgroup; - // Total LDS use per subgroup aligned to the register granularity to keep ESGS and GSVS data on chip. - unsigned onchipEsGsVsLdsSize = alignTo(esGsLdsSize + gsVsLdsSize, ldsSizeDwordGranularity); + // Total LDS use per subgroup to keep ESGS and GSVS data on chip. + unsigned onchipEsGsVsLdsSize = esGsLdsSize + gsVsLdsSize; unsigned onchipEsGsLdsSizeOnchipGsVs = esGsLdsSize; if (onchipEsGsVsLdsSize > maxLdsSize) { @@ -838,8 +837,7 @@ bool PatchResourceCollect::checkGsOnChipValidity() { std::min(esMinVertsPerSubgroup * onchipGsPrimsPerSubgroup * reuseOffMultiplier, maxEsVertsPerSubgroup); // Calculate the LDS sizes required to hit this threshold. - onchipEsGsLdsSizeOnchipGsVs = - alignTo(esGsRingItemSize * worstCaseEsVertsPerSubgroup, ldsSizeDwordGranularity); + onchipEsGsLdsSizeOnchipGsVs = esGsRingItemSize * worstCaseEsVertsPerSubgroup; gsVsLdsSize = gsVsItemSize * onchipGsPrimsPerSubgroup; onchipEsGsVsLdsSize = onchipEsGsLdsSizeOnchipGsVs + gsVsLdsSize; @@ -896,10 +894,8 @@ bool PatchResourceCollect::checkGsOnChipValidity() { gsResUsage->inOutUsage.gs.calcFactor.gsVsRingItemSize = gsOnChip ? gsVsRingItemSizeOnChip : gsVsRingItemSize; if (m_pipelineState->getTargetInfo().getGfxIpVersion().major == 10 && hasTs && !gsOnChip) { - unsigned esVertsNum = Gfx9::EsVertsOffchipGsOrTess; - unsigned onChipGsLdsMagicSize = alignTo( - (esVertsNum * esGsRingItemSize) + esGsExtraLdsDwords, - static_cast((1 << m_pipelineState->getTargetInfo().getGpuProperty().ldsSizeDwordGranularityShift))); + unsigned esVertsNum = EsVertsOffchipGsOrTess; + unsigned onChipGsLdsMagicSize = (esVertsNum * esGsRingItemSize) + esGsExtraLdsDwords; // If the new size is greater than the size we previously set // then we need to either increase the size or decrease the verts @@ -914,7 +910,7 @@ bool PatchResourceCollect::checkGsOnChipValidity() { } } // Support multiple GS instances - unsigned gsPrimsNum = Gfx9::GsPrimsOffchipGsOrTess / gsInstanceCount; + unsigned gsPrimsNum = GsPrimsOffchipGsOrTess / gsInstanceCount; // NOTE: If ray query uses LDS stack, the expected max thread count in the group is 64. And we force wave size // to be 64 in order to keep all threads in the same wave. In the future, we could consider to get rid of this @@ -1119,7 +1115,6 @@ bool PatchResourceCollect::isVertexReuseDisabled() { // // @param module : LLVM module void PatchResourceCollect::checkRayQueryLdsStackUsage(Module *module) { - assert(m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 10); auto ldsStack = module->getNamedGlobal(RayQueryLdsStackName); if (ldsStack) { SmallVector worklist; @@ -1214,6 +1209,12 @@ void PatchResourceCollect::visitCallInst(CallInst &callInst) { m_resUsage->resourceRead = true; if (flags & Builder::BufferFlagWritten) m_resUsage->resourceWrite = true; + } else if (auto *loadStridedBufferDescOp = dyn_cast(&callInst)) { + unsigned flags = loadStridedBufferDescOp->getFlags(); + // Mark the shader as reading and writing (if applicable) a resource. + m_resUsage->resourceRead = true; + if (flags & Builder::BufferFlagWritten) + m_resUsage->resourceWrite = true; } } @@ -2317,36 +2318,62 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { unsigned availPerPrimitiveOutMapLoc = inOutUsage.perPrimitiveOutputMapLocCount; // Map per-vertex built-in outputs to generic ones - if (builtInUsage.mesh.position) - inOutUsage.builtInOutputLocMap[BuiltInPosition] = availOutMapLoc++; + if (builtInUsage.mesh.position) { + inOutUsage.builtInOutputLocMap[BuiltInPosition] = availOutMapLoc; + inOutUsage.mesh.vertexOutputComponents[availOutMapLoc] = {4, BuiltInPosition}; // vec4 + ++availOutMapLoc; + } - if (builtInUsage.mesh.pointSize) - inOutUsage.builtInOutputLocMap[BuiltInPointSize] = availOutMapLoc++; + if (builtInUsage.mesh.pointSize) { + inOutUsage.builtInOutputLocMap[BuiltInPointSize] = availOutMapLoc; + inOutUsage.mesh.vertexOutputComponents[availOutMapLoc] = {1, BuiltInPointSize}; // float + ++availOutMapLoc; + } if (builtInUsage.mesh.clipDistance > 0) { - inOutUsage.builtInOutputLocMap[BuiltInClipDistance] = availOutMapLoc++; + inOutUsage.builtInOutputLocMap[BuiltInClipDistance] = availOutMapLoc; + inOutUsage.mesh.vertexOutputComponents[availOutMapLoc] = {static_cast(builtInUsage.mesh.clipDistance), + BuiltInClipDistance}; // float[] + ++availOutMapLoc; + if (builtInUsage.mesh.clipDistance > 4) ++availOutMapLoc; } if (builtInUsage.mesh.cullDistance > 0) { - inOutUsage.builtInOutputLocMap[BuiltInCullDistance] = availOutMapLoc++; + inOutUsage.builtInOutputLocMap[BuiltInCullDistance] = availOutMapLoc; + inOutUsage.mesh.vertexOutputComponents[availOutMapLoc] = {static_cast(builtInUsage.mesh.cullDistance), + BuiltInCullDistance}; // float[] + ++availOutMapLoc; + if (builtInUsage.mesh.cullDistance > 4) ++availOutMapLoc; } // Map per-primitive built-in outputs to generic ones - if (builtInUsage.mesh.primitiveId) - inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInPrimitiveId] = availPerPrimitiveOutMapLoc++; + if (builtInUsage.mesh.primitiveId) { + inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInPrimitiveId] = availPerPrimitiveOutMapLoc; + inOutUsage.mesh.primitiveOutputComponents[availPerPrimitiveOutMapLoc] = {1, BuiltInPrimitiveId}; // int + ++availPerPrimitiveOutMapLoc; + } - if (builtInUsage.mesh.viewportIndex) - inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInViewportIndex] = availPerPrimitiveOutMapLoc++; + if (builtInUsage.mesh.viewportIndex) { + inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInViewportIndex] = availPerPrimitiveOutMapLoc; + inOutUsage.mesh.primitiveOutputComponents[availPerPrimitiveOutMapLoc] = {1, BuiltInViewportIndex}; // int + ++availPerPrimitiveOutMapLoc; + } - if (builtInUsage.mesh.layer) - inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInLayer] = availPerPrimitiveOutMapLoc++; + if (builtInUsage.mesh.layer) { + inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInLayer] = availPerPrimitiveOutMapLoc; + inOutUsage.mesh.primitiveOutputComponents[availPerPrimitiveOutMapLoc] = {1, BuiltInLayer}; // int + ++availPerPrimitiveOutMapLoc; + } - if (builtInUsage.mesh.primitiveShadingRate) - inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInPrimitiveShadingRate] = availPerPrimitiveOutMapLoc++; + if (builtInUsage.mesh.primitiveShadingRate) { + inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInPrimitiveShadingRate] = availPerPrimitiveOutMapLoc; + inOutUsage.mesh.primitiveOutputComponents[availPerPrimitiveOutMapLoc] = {1, BuiltInPrimitiveShadingRate}; // int + ++availPerPrimitiveOutMapLoc; + } // Map per-vertex built-in outputs to exported locations if (nextStage == ShaderStage::Fragment) { @@ -2357,13 +2384,13 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { if (nextBuiltInUsage.clipDistance > 0) { assert(nextInOutUsage.builtInInputLocMap.find(BuiltInClipDistance) != nextInOutUsage.builtInInputLocMap.end()); const unsigned mapLoc = nextInOutUsage.builtInInputLocMap[BuiltInClipDistance]; - inOutUsage.mesh.builtInExportLocs[BuiltInClipDistance] = mapLoc; + inOutUsage.mesh.vertexBuiltInExportSlots[BuiltInClipDistance] = mapLoc; } if (nextBuiltInUsage.cullDistance > 0) { assert(nextInOutUsage.builtInInputLocMap.find(BuiltInCullDistance) != nextInOutUsage.builtInInputLocMap.end()); const unsigned mapLoc = nextInOutUsage.builtInInputLocMap[BuiltInCullDistance]; - inOutUsage.mesh.builtInExportLocs[BuiltInCullDistance] = mapLoc; + inOutUsage.mesh.vertexBuiltInExportSlots[BuiltInCullDistance] = mapLoc; } } else if (nextStage == ShaderStage::Invalid) { // Mesh shader only @@ -2377,12 +2404,12 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { } if (builtInUsage.mesh.clipDistance > 0) - inOutUsage.mesh.builtInExportLocs[BuiltInClipDistance] = exportLoc; + inOutUsage.mesh.vertexBuiltInExportSlots[BuiltInClipDistance] = exportLoc; if (builtInUsage.mesh.cullDistance > 0) { if (builtInUsage.mesh.clipDistance >= 4) ++exportLoc; - inOutUsage.mesh.builtInExportLocs[BuiltInCullDistance] = exportLoc; + inOutUsage.mesh.vertexBuiltInExportSlots[BuiltInCullDistance] = exportLoc; } } } @@ -2397,39 +2424,36 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { assert(nextInOutUsage.perPrimitiveBuiltInInputLocMap.find(BuiltInPrimitiveId) != nextInOutUsage.perPrimitiveBuiltInInputLocMap.end()); const unsigned mapLoc = nextInOutUsage.perPrimitiveBuiltInInputLocMap[BuiltInPrimitiveId]; - inOutUsage.mesh.perPrimitiveBuiltInExportLocs[BuiltInPrimitiveId] = mapLoc; + inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInPrimitiveId] = mapLoc; } if (nextBuiltInUsage.layer) { assert(nextInOutUsage.perPrimitiveBuiltInInputLocMap.find(BuiltInLayer) != nextInOutUsage.perPrimitiveBuiltInInputLocMap.end()); const unsigned mapLoc = nextInOutUsage.perPrimitiveBuiltInInputLocMap[BuiltInLayer]; - inOutUsage.mesh.perPrimitiveBuiltInExportLocs[BuiltInLayer] = mapLoc; + inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInLayer] = mapLoc; } if (nextBuiltInUsage.viewportIndex) { assert(nextInOutUsage.perPrimitiveBuiltInInputLocMap.find(BuiltInViewportIndex) != nextInOutUsage.perPrimitiveBuiltInInputLocMap.end()); const unsigned mapLoc = nextInOutUsage.perPrimitiveBuiltInInputLocMap[BuiltInViewportIndex]; - inOutUsage.mesh.perPrimitiveBuiltInExportLocs[BuiltInViewportIndex] = mapLoc; + inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInViewportIndex] = mapLoc; } } else if (nextStage == ShaderStage::Invalid) { // Mesh shader only unsigned availPerPrimitiveExportLoc = inOutUsage.perPrimitiveOutputMapLocCount; if (builtInUsage.mesh.primitiveId) - inOutUsage.mesh.perPrimitiveBuiltInExportLocs[BuiltInPrimitiveId] = availPerPrimitiveExportLoc++; + inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInPrimitiveId] = availPerPrimitiveExportLoc++; if (builtInUsage.mesh.layer) - inOutUsage.mesh.perPrimitiveBuiltInExportLocs[BuiltInLayer] = availPerPrimitiveExportLoc++; + inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInLayer] = availPerPrimitiveExportLoc++; if (builtInUsage.mesh.viewportIndex) - inOutUsage.mesh.perPrimitiveBuiltInExportLocs[BuiltInViewportIndex] = availPerPrimitiveExportLoc++; + inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInViewportIndex] = availPerPrimitiveExportLoc++; } - inOutUsage.mesh.genericOutputMapLocCount = inOutUsage.outputMapLocCount; - inOutUsage.mesh.perPrimitiveGenericOutputMapLocCount = inOutUsage.perPrimitiveOutputMapLocCount; - inOutUsage.outputMapLocCount = std::max(inOutUsage.outputMapLocCount, availOutMapLoc); inOutUsage.perPrimitiveOutputMapLocCount = std::max(inOutUsage.perPrimitiveOutputMapLocCount, availPerPrimitiveOutMapLoc); @@ -2987,6 +3011,25 @@ void PatchResourceCollect::updateOutputLocInfoMapWithUnpack() { if (m_shaderStage == ShaderStage::Geometry) inOutUsage.gs.outLocCount[streamId] = std::max(inOutUsage.gs.outLocCount[streamId], newLocMappedTo + 1); } + + // After location mapping is done, we update the location/components map of mesh shader vertex outputs with new + // locations. + if (m_shaderStage == ShaderStage::Mesh) { + // Make a copy and clear the old map + auto vertexOutputComponents = inOutUsage.mesh.vertexOutputComponents; + inOutUsage.mesh.vertexOutputComponents.clear(); + + // Setup a new map with new locations + for (auto &locInfoPair : outputLocInfoMap) { + const unsigned location = locInfoPair.first.getLocation(); + const unsigned newLocation = locInfoPair.second.getLocation(); + + if (vertexOutputComponents.count(location) == 0) + continue; // Skip if not found + + inOutUsage.mesh.vertexOutputComponents[newLocation] = vertexOutputComponents[location]; + } + } } // @@ -3079,6 +3122,25 @@ void PatchResourceCollect::updateOutputLocInfoMapWithUnpack() { assert(newLocMappedTo != InvalidValue); locPair.second = newLocMappedTo; } + + // After location mapping is done, we update the location/components map of mesh shader primitive outputs with + // new locations. + if (m_shaderStage == ShaderStage::Mesh) { + // Make a copy and clear the old map + auto primitiveOutputComponents = inOutUsage.mesh.primitiveOutputComponents; + inOutUsage.mesh.primitiveOutputComponents.clear(); + + // Setup a new map with new locations + for (auto &locPair : perPrimitiveOutputLocMap) { + const unsigned location = locPair.first; + const unsigned newLocation = locPair.second; + + if (primitiveOutputComponents.count(location) == 0) + continue; // Skip if not found + + inOutUsage.mesh.primitiveOutputComponents[newLocation] = primitiveOutputComponents[location]; + } + } } m_outputCalls.clear(); diff --git a/lgc/patch/PatchSetupTargetFeatures.cpp b/lgc/patch/PatchSetupTargetFeatures.cpp index aa7e5c31af..01eca8701e 100644 --- a/lgc/patch/PatchSetupTargetFeatures.cpp +++ b/lgc/patch/PatchSetupTargetFeatures.cpp @@ -78,10 +78,17 @@ void PatchSetupTargetFeatures::setupTargetFeatures(Module *module) { auto shaderStage = lgc::getShaderStage(&*func); - if (!shaderStage) { - errs() << "Invalid shader stage for function " << func->getName() << "\n"; - report_fatal_error("Got invalid shader stage when setting up features for function"); + // NOTE: AMDGPU_CS_ChainPreserve is expected to not have shader stage set. +#if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 465196 + if (func->getCallingConv() != CallingConv::AMDGPU_CS_ChainPreserve) { +#endif + if (!shaderStage.has_value()) { + errs() << "Invalid shader stage for function " << func->getName() << "\n"; + report_fatal_error("Got invalid shader stage when setting up features for function"); + } +#if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 465196 } +#endif if (isShaderEntryPoint(&*func)) { bool useSiScheduler = m_pipelineState->getShaderOptions(shaderStage.value()).useSiScheduler; @@ -93,7 +100,8 @@ void PatchSetupTargetFeatures::setupTargetFeatures(Module *module) { } } - if (func->getCallingConv() == CallingConv::AMDGPU_GS) { + auto callingConv = func->getCallingConv(); + if (callingConv == CallingConv::AMDGPU_GS) { // NOTE: For NGG primitive shader, enable 128-bit LDS load/store operations to optimize gvec4 data // read/write. This usage must enable the feature of using CI+ additional instructions. const auto nggControl = m_pipelineState->getNggControl(); @@ -101,11 +109,16 @@ void PatchSetupTargetFeatures::setupTargetFeatures(Module *module) { targetFeatures += ",+ci-insts,+enable-ds128"; } - if (func->getCallingConv() == CallingConv::AMDGPU_HS) { + if (callingConv == CallingConv::AMDGPU_HS) { // Force s_barrier to be present (ignore optimization) builder.addAttribute("amdgpu-flat-work-group-size", "128,128"); } - if (func->getCallingConv() == CallingConv::AMDGPU_CS || func->getCallingConv() == CallingConv::AMDGPU_Gfx) { + + if (callingConv == CallingConv::AMDGPU_CS || callingConv == CallingConv::AMDGPU_Gfx +#if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 465196 + || callingConv == CallingConv::AMDGPU_CS_Chain +#endif + ) { // Set the work group size const auto &computeMode = m_pipelineState->getShaderModes()->getComputeShaderMode(); unsigned flatWorkGroupSize = computeMode.workgroupSizeX * computeMode.workgroupSizeY * computeMode.workgroupSizeZ; @@ -113,7 +126,7 @@ void PatchSetupTargetFeatures::setupTargetFeatures(Module *module) { builder.addAttribute("amdgpu-flat-work-group-size", (Twine(flatWorkGroupSize) + "," + Twine(flatWorkGroupSize)).toStringRef(attributeBuf)); } - if (func->getCallingConv() == CallingConv::AMDGPU_CS) { + if (callingConv == CallingConv::AMDGPU_CS) { // Tag the position of MultiDispatchInfo argument, so the backend knows which // sgpr needs to be preloaded for COMPUTE_PGM_RSRC2.tg_size_en (Work-Group Info). // This is needed for LDS spilling. @@ -126,13 +139,13 @@ void PatchSetupTargetFeatures::setupTargetFeatures(Module *module) { auto gfxIp = m_pipelineState->getTargetInfo().getGfxIpVersion(); - if (gfxIp.major >= 10) { - // NOTE: The sub-attribute 'wavefrontsize' of 'target-features' is set in advance to let optimization - // pass know we are in which wavesize mode. Here, we read back it and append it to finalized target - // feature strings. - if (func->hasFnAttribute("target-features")) - targetFeatures += func->getFnAttribute("target-features").getValueAsString(); + // NOTE: The sub-attribute 'wavefrontsize' of 'target-features' is set in advance to let optimization + // pass know we are in which wavesize mode. Here, we read back it and append it to finalized target + // feature strings. + if (func->hasFnAttribute("target-features")) + targetFeatures += func->getFnAttribute("target-features").getValueAsString(); + if (shaderStage.has_value()) { if (m_pipelineState->getShaderWgpMode(shaderStage.value())) targetFeatures += ",-cumode"; else @@ -156,7 +169,7 @@ void PatchSetupTargetFeatures::setupTargetFeatures(Module *module) { // In the backend, f32 denormals are handled by default, so request denormal flushing behavior. builder.addAttribute("denormal-fp-math-f32", "preserve-sign"); - if (shaderStage != ShaderStage::CopyShader) { + if (shaderStage.has_value() && shaderStage != ShaderStage::CopyShader) { const auto &shaderMode = m_pipelineState->getShaderModes()->getCommonShaderMode(shaderStage.value()); if (shaderMode.fp16DenormMode == FpDenormMode::FlushNone || shaderMode.fp16DenormMode == FpDenormMode::FlushIn || shaderMode.fp64DenormMode == FpDenormMode::FlushNone || shaderMode.fp64DenormMode == FpDenormMode::FlushIn) { diff --git a/lgc/patch/RegisterMetadataBuilder.cpp b/lgc/patch/RegisterMetadataBuilder.cpp index 35e5dc7124..6042c4313d 100644 --- a/lgc/patch/RegisterMetadataBuilder.cpp +++ b/lgc/patch/RegisterMetadataBuilder.cpp @@ -29,7 +29,7 @@ *********************************************************************************************************************** */ #include "RegisterMetadataBuilder.h" -#include "Gfx9Chip.h" +#include "lgc/state/AbiMetadata.h" #include "lgc/state/PalMetadata.h" #include "lgc/state/PipelineState.h" #include "lgc/state/TargetInfo.h" @@ -40,77 +40,33 @@ using namespace llvm; namespace lgc { -namespace Gfx9 { +// Preferred number of GS threads per VS thread. +constexpr unsigned GsThreadsPerVsThread = 2; -#include "chip/gfx9/gfx9_plus_merged_enum.h" +// Preferred number of GS threads per subgroup. +constexpr unsigned MaxGsThreadsPerSubgroup = 256; -using namespace Pal::Gfx9::Chip; +// The register headers don't specify an enum for the values of VGT_GS_MODE.ONCHIP. +enum VGT_GS_MODE_ONCHIP_TYPE : unsigned { + VGT_GS_MODE_ONCHIP_OFF = 1, + VGT_GS_MODE_ONCHIP_ON = 3, +}; // ===================================================================================================================== // Builds PAL metadata for pipeline. void RegisterMetadataBuilder::buildPalMetadata() { if (m_pipelineState->isGraphics()) { - const bool hasTs = (m_hasTcs || m_hasTes); - m_isNggMode = false; - if (m_gfxIp.major >= 11) - m_isNggMode = true; - else if (m_gfxIp.major == 10) - m_isNggMode = m_pipelineState->getNggControl()->enableNgg; - - Util::Abi::PipelineType pipelineType = Util::Abi::PipelineType::VsPs; - auto lastVertexProcessingStage = m_pipelineState->getLastVertexProcessingStage(); - - DenseMap apiHwShaderMap; - if (m_hasTask || m_hasMesh) { - assert(m_pipelineState->getTargetInfo().getGfxIpVersion() >= GfxIpVersion({10, 3})); - if (m_hasMesh) { - apiHwShaderMap[ShaderStage::Mesh] = Util::Abi::HwShaderGs; - pipelineType = Util::Abi::PipelineType::Mesh; - } - if (m_hasTask) { - apiHwShaderMap[ShaderStage::Task] = Util::Abi::HwShaderCs; - pipelineType = Util::Abi::PipelineType::TaskMesh; - } - } else { - if (m_hasGs) { - auto preGsStage = m_pipelineState->getPrevShaderStage(ShaderStage::Geometry); - if (preGsStage != ShaderStage::Invalid) - apiHwShaderMap[preGsStage] = Util::Abi::HwShaderGs; - } - if (m_hasTcs) { - apiHwShaderMap[ShaderStage::TessControl] = Util::Abi::HwShaderHs; - if (m_hasVs) - apiHwShaderMap[ShaderStage::Vertex] = Util::Abi::HwShaderHs; - } + m_isNggMode = m_pipelineState->isNggEnabled(); - if (lastVertexProcessingStage != ShaderStage::Invalid) { - if (lastVertexProcessingStage == ShaderStage::CopyShader) - lastVertexProcessingStage = ShaderStage::Geometry; - if (m_isNggMode) { - apiHwShaderMap[lastVertexProcessingStage] = Util::Abi::HwShaderGs; - pipelineType = hasTs ? Util::Abi::PipelineType::NggTess : Util::Abi::PipelineType::Ngg; - } else { - apiHwShaderMap[lastVertexProcessingStage] = Util::Abi::HwShaderVs; - if (m_hasGs) - apiHwShaderMap[lastVertexProcessingStage] |= Util::Abi::HwShaderGs; - - if (hasTs && m_hasGs) - pipelineType = Util::Abi::PipelineType::GsTess; - else if (hasTs) - pipelineType = Util::Abi::PipelineType::Tess; - else if (m_hasGs) - pipelineType = Util::Abi::PipelineType::Gs; - else - pipelineType = Util::Abi::PipelineType::VsPs; - } - } - } - if (m_pipelineState->hasShaderStage(ShaderStage::Fragment)) - apiHwShaderMap[ShaderStage::Fragment] = Util::Abi::HwShaderPs; + auto abiHwShaderMap = m_pipelineState->getAbiHwShaderMap(); + auto pipelineType = static_cast(m_pipelineState->getAbiPipelineType()); + auto lastVertexProcessingStage = m_pipelineState->getLastVertexProcessingStage(); + if (lastVertexProcessingStage == ShaderStage::CopyShader) + lastVertexProcessingStage = ShaderStage::Geometry; // Set the mapping between api shader stage and hardware stage unsigned hwStageMask = 0; - for (const auto &entry : apiHwShaderMap) { + for (const auto &entry : *abiHwShaderMap) { const auto apiStage = static_cast(entry.first); hwStageMask |= entry.second; addApiHwShaderMapping(apiStage, entry.second); @@ -404,7 +360,7 @@ void RegisterMetadataBuilder::buildEsGsRegisters() { // ===================================================================================================================== // Builds register configuration for hardware primitive shader. void RegisterMetadataBuilder::buildPrimShaderRegisters() { - assert(m_gfxIp.major >= 10 || (m_hasMesh && m_gfxIp >= GfxIpVersion({10, 3}))); + assert(!m_hasMesh || (m_hasMesh && m_gfxIp >= GfxIpVersion({10, 3}))); const auto vsResUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Vertex); const auto &vsBuiltInUsage = vsResUsage->builtInUsage.vs; const auto tesResUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessEval); @@ -814,9 +770,7 @@ void RegisterMetadataBuilder::buildPsRegisters() { fragmentMode.earlyFragmentTests && resUsage->resourceWrite; dbShaderControl[Util::Abi::DbShaderControlMetadataKey::ExecOnHierFail] = execOnHeirFail; dbShaderControl[Util::Abi::DbShaderControlMetadataKey::ConservativeZExport] = conservativeZExport; - if (m_gfxIp.major >= 10) - dbShaderControl[Util::Abi::DbShaderControlMetadataKey::PreShaderDepthCoverageEnable] = - fragmentMode.postDepthCoverage; + dbShaderControl[Util::Abi::DbShaderControlMetadataKey::PreShaderDepthCoverageEnable] = fragmentMode.postDepthCoverage; // SPI_PS_INPUT_CNTL_0..31 // NOTE: PAL expects at least one mmSPI_PS_INPUT_CNTL_0 register set, so we always patch it at least one if none @@ -867,7 +821,7 @@ void RegisterMetadataBuilder::buildPsRegisters() { // PRIM_EXPORT_COUNT. When VS_EXPORT_COUNT = 0, HW assumes there is still a vertex attribute exported even // though this is not what we want. Hence, we should reserve param0 as a dummy vertex attribute and all // primitive attributes are moved after it. - bool hasNoVertexAttrib = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.expCount == 0; + bool hasNoVertexAttrib = resUsage->inOutUsage.inputMapLocCount == 0; if (hasNoVertexAttrib) ++spiPsInputCntlInfo.offset; spiPsInputCntlInfo.primAttr = true; @@ -1044,10 +998,8 @@ void RegisterMetadataBuilder::buildShaderExecutionRegisters(Util::Abi::HardwareS auto hwShaderNode = getHwShaderNode(hwStage); ShaderStageEnum apiStage = apiStage2 != ShaderStage::Invalid ? apiStage2 : apiStage1; - if (m_isNggMode || m_gfxIp.major >= 10) { - const unsigned waveSize = m_pipelineState->getShaderWaveSize(apiStage); - hwShaderNode[Util::Abi::HardwareStageMetadataKey::WavefrontSize] = waveSize; - } + const unsigned waveSize = m_pipelineState->getShaderWaveSize(apiStage); + hwShaderNode[Util::Abi::HardwareStageMetadataKey::WavefrontSize] = waveSize; unsigned checksum = 0; if (apiStage1 != ShaderStage::Invalid && apiStage1 != ShaderStage::CopyShader) @@ -1083,16 +1035,14 @@ void RegisterMetadataBuilder::buildShaderExecutionRegisters(Util::Abi::HardwareS } hwShaderNode[Util::Abi::HardwareStageMetadataKey::UserSgprs] = userDataCount; - if (m_gfxIp.major >= 10) { - hwShaderNode[Util::Abi::HardwareStageMetadataKey::MemOrdered] = true; - if (hwStage == Util::Abi::HardwareStage::Hs || hwStage == Util::Abi::HardwareStage::Gs) { - bool wgpMode = false; - if (apiStage1 != ShaderStage::Invalid) - wgpMode = m_pipelineState->getShaderWgpMode(apiStage1); - if (apiStage2 != ShaderStage::Invalid) - wgpMode = wgpMode || m_pipelineState->getShaderWgpMode(apiStage2); - hwShaderNode[Util::Abi::HardwareStageMetadataKey::WgpMode] = wgpMode; - } + hwShaderNode[Util::Abi::HardwareStageMetadataKey::MemOrdered] = true; + if (hwStage == Util::Abi::HardwareStage::Hs || hwStage == Util::Abi::HardwareStage::Gs) { + bool wgpMode = false; + if (apiStage1 != ShaderStage::Invalid) + wgpMode = m_pipelineState->getShaderWgpMode(apiStage1); + if (apiStage2 != ShaderStage::Invalid) + wgpMode = wgpMode || m_pipelineState->getShaderWgpMode(apiStage2); + hwShaderNode[Util::Abi::HardwareStageMetadataKey::WgpMode] = wgpMode; } hwShaderNode[Util::Abi::HardwareStageMetadataKey::SgprLimit] = sgprLimits; @@ -1117,6 +1067,12 @@ void RegisterMetadataBuilder::buildShaderExecutionRegisters(Util::Abi::HardwareS userDataLimit = value + 1; } m_pipelineState->getPalMetadata()->setUserDataLimit(userDataLimit); + + // Fill ".spill_threshold" + unsigned spillThreshold = m_pipelineState->getSpillThreshold(apiStage); + if (spillThreshold != USHRT_MAX) { + hwShaderNode[Util::Abi::HardwareStageMetadataKey::ShaderSpillThreshold] = spillThreshold; + } } // ===================================================================================================================== @@ -1352,7 +1308,7 @@ void RegisterMetadataBuilder::buildPaSpecificRegisters() { // SPI_SHADER_POS_FORMAT unsigned availPosCount = 1; // gl_Position is always exported - unsigned posCount = m_gfxIp.major >= 10 ? 5 : 4; + unsigned posCount = 5; if (miscExport) ++availPosCount; @@ -1553,13 +1509,7 @@ void RegisterMetadataBuilder::setVgtTfParam() { // // @param onChipLdsSize : The value of onChip LDS size unsigned RegisterMetadataBuilder::calcLdsSize(unsigned ldsSizeInDwords) { - const unsigned ldsSizeDwordGranularityShift = - m_pipelineState->getTargetInfo().getGpuProperty().ldsSizeDwordGranularityShift; - const unsigned ldsSizeDwordGranularity = 1u << ldsSizeDwordGranularityShift; - ldsSizeInDwords = alignTo(ldsSizeInDwords, ldsSizeDwordGranularity); return (ldsSizeInDwords * 4); } -} // namespace Gfx9 - } // namespace lgc diff --git a/lgc/patch/RegisterMetadataBuilder.h b/lgc/patch/RegisterMetadataBuilder.h index b835e22837..3683aabd00 100644 --- a/lgc/patch/RegisterMetadataBuilder.h +++ b/lgc/patch/RegisterMetadataBuilder.h @@ -36,7 +36,6 @@ namespace lgc { -namespace Gfx9 { // ===================================================================================================================== // Represents the builder to generate register configurations for GFX11 plus chips. class RegisterMetadataBuilder : public ConfigBuilderBase { @@ -66,5 +65,4 @@ class RegisterMetadataBuilder : public ConfigBuilderBase { bool m_isNggMode = false; }; -} // namespace Gfx9 } // namespace lgc diff --git a/lgc/patch/ShaderMerger.cpp b/lgc/patch/ShaderMerger.cpp index 3633aff185..4b6a4b8121 100644 --- a/lgc/patch/ShaderMerger.cpp +++ b/lgc/patch/ShaderMerger.cpp @@ -56,7 +56,6 @@ using namespace lgc; ShaderMerger::ShaderMerger(PipelineState *pipelineState, PipelineShadersResult *pipelineShaders) : m_pipelineState(pipelineState), m_context(&pipelineState->getContext()), m_gfxIp(pipelineState->getTargetInfo().getGfxIpVersion()) { - assert(m_gfxIp.major >= 10); assert(m_pipelineState->isGraphics()); m_hasVs = m_pipelineState->hasShaderStage(ShaderStage::Vertex); @@ -93,8 +92,6 @@ unsigned ShaderMerger::getSpecialSgprInputIndex(GfxIpVersion gfxIp, LsHs::Specia {LsHs::waveIdInGroup, 5}, // s5 }; - assert(gfxIp.major >= 10); // Must be GFX10+ - if (gfxIp.major >= 11) { assert(LsHsSpecialSgprInputMapGfx11.count(sgprInput) > 0); return LsHsSpecialSgprInputMapGfx11.at(sgprInput); @@ -145,18 +142,14 @@ unsigned ShaderMerger::getSpecialSgprInputIndex(GfxIpVersion gfxIp, EsGs::Specia {EsGs::FlatScratchHigh, 7}, // s7 }; - assert(gfxIp.major >= 10); // Must be GFX10+ - if (gfxIp.major >= 11) { assert(EsGsSpecialSgprInputMapGfx11.count(sgprInput) > 0); return EsGsSpecialSgprInputMapGfx11.at(sgprInput); } - if (gfxIp.major >= 10) { - if (useNgg) { - assert(EsGsSpecialSgprInputMapGfx10.count(sgprInput) > 0); - return EsGsSpecialSgprInputMapGfx10.at(sgprInput); - } + if (useNgg) { + assert(EsGsSpecialSgprInputMapGfx10.count(sgprInput) > 0); + return EsGsSpecialSgprInputMapGfx10.at(sgprInput); } assert(EsGsSpecialSgprInputMapGfx9.count(sgprInput) > 0); @@ -312,6 +305,8 @@ Function *ShaderMerger::generateLsHsEntryPoint(Function *lsEntryPoint, Function Function *entryPoint = createFunctionHelper(entryPointTy, GlobalValue::ExternalLinkage, hsEntryPoint->getParent(), lgcName::LsHsEntryPoint); entryPoint->setDLLStorageClass(GlobalValue::DLLExportStorageClass); + setShaderStage(entryPoint, ShaderStage::TessControl); + auto module = hsEntryPoint->getParent(); module->getFunctionList().push_front(entryPoint); @@ -322,8 +317,7 @@ Function *ShaderMerger::generateLsHsEntryPoint(Function *lsEntryPoint, Function entryPoint->addFnAttr("amdgpu-flat-work-group-size", "128,128"); // Force s_barrier to be present (ignore optimization) const unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::TessControl); - if (m_gfxIp.major >= 10) - entryPoint->addFnAttr("target-features", ",+wavefrontsize" + std::to_string(waveSize)); // Set wavefront size + entryPoint->addFnAttr("target-features", ",+wavefrontsize" + std::to_string(waveSize)); // Set wavefront size applyTuningAttributes(entryPoint, tuningAttrs); for (auto &arg : entryPoint->args()) { @@ -651,8 +645,7 @@ Function *ShaderMerger::generateEsGsEntryPoint(Function *esEntryPoint, Function entryPoint->addFnAttr("amdgpu-flat-work-group-size", "128,128"); // Force s_barrier to be present (ignore optimization) const unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Geometry); - if (m_gfxIp.major >= 10) - entryPoint->addFnAttr("target-features", ",+wavefrontsize" + std::to_string(waveSize)); // Set wavefront size + entryPoint->addFnAttr("target-features", ",+wavefrontsize" + std::to_string(waveSize)); // Set wavefront size applyTuningAttributes(entryPoint, tuningAttrs); for (auto &arg : entryPoint->args()) { @@ -955,43 +948,29 @@ void ShaderMerger::appendArguments(SmallVectorImpl &args, ArrayRef= 10); - - Module *module = nullptr; - if (entryPoint1) - module = entryPoint1->getParent(); - else if (entryPoint2) - module = entryPoint2->getParent(); - assert(module); + Function *entryPoint = entryPoint2 ? entryPoint2 : entryPoint1; + assert(entryPoint); + Module *module = entryPoint->getParent(); auto ldsStack = module->getNamedGlobal(RayQueryLdsStackName); if (ldsStack) { - unsigned ldsStackBase = 0; - - std::optional shaderStage2; - if (entryPoint2) - shaderStage2 = lgc::getShaderStage(entryPoint2); + std::optional shaderStage = lgc::getShaderStage(entryPoint); + bool hasLdsStack = false; - if (shaderStage2 == ShaderStage::TessControl) { + if (shaderStage == ShaderStage::TessControl) { // Must be LS-HS merged shader const auto &calcFactor = m_pipelineState->getShaderResourceUsage(ShaderStage::TessControl)->inOutUsage.tcs.calcFactor; - if (calcFactor.rayQueryLdsStackSize > 0) - ldsStackBase = calcFactor.tessOnChipLdsSize; + hasLdsStack = calcFactor.rayQueryLdsStackSize > 0; } else { // Must be ES-GS merged shader or NGG primitive shader const auto &calcFactor = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.gs.calcFactor; - if (calcFactor.rayQueryLdsStackSize > 0) - ldsStackBase = calcFactor.gsOnChipLdsSize; + hasLdsStack = calcFactor.rayQueryLdsStackSize > 0; } - if (ldsStackBase > 0) { - auto lds = Patch::getLdsVariable(m_pipelineState, module); - auto newLdsStack = ConstantExpr::getGetElementPtr( - lds->getValueType(), lds, - ArrayRef({ConstantInt::get(Type::getInt32Ty(*m_context), 0), - ConstantInt::get(Type::getInt32Ty(*m_context), ldsStackBase)})); - newLdsStack = ConstantExpr::getBitCast(newLdsStack, ldsStack->getType()); + if (hasLdsStack) { + auto lds = Patch::getLdsVariable(m_pipelineState, entryPoint, /*rtStack=*/true); + auto newLdsStack = ConstantExpr::getBitCast(lds, ldsStack->getType()); SmallVector ldsStackInsts; for (auto user : ldsStack->users()) { @@ -1336,7 +1315,7 @@ void ShaderMerger::storeTessFactorsWithOpt(Value *threadIdInWave, IRBuilder<> &b builder.CreateIntToPtr(globalTablePtr, PointerType::get(tfBufferDescTy, ADDR_SPACE_CONST), "globalTablePtr"); Value *tfBufferDescPtr = - builder.CreateGEP(tfBufferDescTy, globalTablePtr, builder.getInt32(SiDrvTableTfBufferOffs), "tfBufferDescPtr"); + builder.CreateConstGEP1_32(builder.getInt8Ty(), globalTablePtr, SiDrvTableTfBufferOffs * 4, "tfBufferDescPtr"); auto tfBufferDesc = builder.CreateLoad(tfBufferDescTy, tfBufferDescPtr, "tfBufferDesc"); Value *tfBufferBase = getFunctionArgument(entryPoint, getSpecialSgprInputIndex(m_gfxIp, LsHs::TfBufferBase)); @@ -1363,8 +1342,8 @@ void ShaderMerger::storeTessFactorsWithOpt(Value *threadIdInWave, IRBuilder<> &b Value *ShaderMerger::readValueFromLds(Type *readTy, Value *ldsOffset, IRBuilder<> &builder) { assert(readTy->getScalarSizeInBits() == 32); // Only accept 32-bit data - auto lds = Patch::getLdsVariable(m_pipelineState, builder.GetInsertBlock()->getModule()); - Value *readPtr = builder.CreateGEP(lds->getValueType(), lds, {builder.getInt32(0), ldsOffset}); + auto lds = Patch::getLdsVariable(m_pipelineState, builder.GetInsertBlock()->getParent()); + Value *readPtr = builder.CreateGEP(builder.getInt32Ty(), lds, ldsOffset); readPtr = builder.CreateBitCast(readPtr, PointerType::get(readTy, readPtr->getType()->getPointerAddressSpace())); return builder.CreateAlignedLoad(readTy, readPtr, Align(4)); } @@ -1379,8 +1358,8 @@ void ShaderMerger::writeValueToLds(Value *writeValue, Value *ldsOffset, IRBuilde auto writeTy = writeValue->getType(); assert(writeTy->getScalarSizeInBits() == 32); // Only accept 32-bit data - auto lds = Patch::getLdsVariable(m_pipelineState, builder.GetInsertBlock()->getModule()); - Value *writePtr = builder.CreateGEP(lds->getValueType(), lds, {builder.getInt32(0), ldsOffset}); + auto lds = Patch::getLdsVariable(m_pipelineState, builder.GetInsertBlock()->getParent()); + Value *writePtr = builder.CreateGEP(builder.getInt32Ty(), lds, ldsOffset); writePtr = builder.CreateBitCast(writePtr, PointerType::get(writeTy, writePtr->getType()->getPointerAddressSpace())); builder.CreateAlignedStore(writeValue, writePtr, Align(4)); } diff --git a/lgc/patch/SystemValues.cpp b/lgc/patch/SystemValues.cpp index 7f4fda56c4..0deecd5e06 100644 --- a/lgc/patch/SystemValues.cpp +++ b/lgc/patch/SystemValues.cpp @@ -267,7 +267,7 @@ Value *ShaderSystemValues::getGsVsRingBufDesc(unsigned streamId) { const auto resUsage = m_pipelineState->getShaderResourceUsage(m_shaderStage); // Geometry shader, using GS-VS ring for output. - Value *desc = loadDescFromDriverTable(SiDrvTableGsRingOuT0Offs + streamId, builder); + Value *desc = loadDescFromDriverTable(SiDrvTableGsRingOuT0Offs + streamId * 4, builder); unsigned outLocStart = 0; for (int i = 0; i < streamId; ++i) @@ -496,13 +496,12 @@ Instruction *ShaderSystemValues::makePointer(Value *lowValue, Type *ptrTy, unsig // getInternalGlobalTablePtr(). Otherwise there is a danger that code is inserted in the wrong order, giving // invalid IR. // -// @param tableOffset : Byte offset in driver table +// @param tableOffset : DWORD offset in driver table // @param builder : Builder to use for insertion Instruction *ShaderSystemValues::loadDescFromDriverTable(unsigned tableOffset, BuilderBase &builder) { auto globalTable = getInternalGlobalTablePtr(); + Value *descPtr = builder.CreateConstGEP1_32(builder.getInt8Ty(), globalTable, tableOffset * 4); Type *descTy = FixedVectorType::get(builder.getInt32Ty(), 4); - globalTable = cast(builder.CreateBitCast(globalTable, descTy->getPointerTo(ADDR_SPACE_CONST))); - Value *descPtr = builder.CreateGEP(descTy, globalTable, builder.getInt32(tableOffset)); LoadInst *desc = builder.CreateLoad(descTy, descPtr); return desc; } diff --git a/lgc/patch/VertexFetch.cpp b/lgc/patch/VertexFetch.cpp index 7ef14969a8..5587c8f52e 100644 --- a/lgc/patch/VertexFetch.cpp +++ b/lgc/patch/VertexFetch.cpp @@ -1225,14 +1225,12 @@ Value *VertexFetchImpl::fetchVertex(Type *inputTy, const VertexInputDescription // Input components' type Type *inputCompTy = inputTy->isVectorTy() ? cast(inputTy)->getElementType() : inputTy; - unsigned inputCompBytes = std::max(inputCompTy->getScalarSizeInBits() / 8, compFormatInfo->compByteSize); + unsigned inputCompBytes = inputCompTy->getScalarSizeInBits() / 8; // Location size of components. If its type is Double, each component consumes 2 locations. - const unsigned compLocationSize = (inputCompBytes + 3) / 4; - compIdx *= compLocationSize; - // For Double type, we still do 32 bit fetch. - inputCompBytes /= compLocationSize; + const unsigned compLocationSize = (std::max(inputCompBytes, compFormatInfo->compByteSize) + 3) / 4; + compIdx *= compLocationSize; // Whether it is fetching with a packed format. bool isPacked = (compFormatInfo->compByteSize == 0); @@ -1460,7 +1458,7 @@ Value *VertexFetchImpl::loadVertexBufferDescriptor(unsigned binding, BuilderImpl auto descPtr = builderImpl.CreateBufferDesc(InternalDescriptorSetId, CurrentAttributeBufferBinding, builderImpl.getInt32(0), lgc::Builder::BufferFlagAddress); // Create descriptor by a 64-bits pointer - m_curAttribBufferDescr = builderImpl.buildInlineBufferDesc(descPtr); + m_curAttribBufferDescr = builderImpl.buildInlineBufferDesc(descPtr, 0); } vtxDesc = m_curAttribBufferDescr; } else { @@ -1618,12 +1616,11 @@ void VertexFetchImpl::addVertexFetchInst(Value *vbDesc, Value *vbIndex, Value *s unsigned fetchCompBytes, bool isSigned, bool isPacked, bool fetchInByte, BuilderImpl &builderImpl, Value **ppFetch) const { Intrinsic::ID instId = Intrinsic::amdgcn_struct_tbuffer_load; - Value *instOffset = builderImpl.getInt32(offset); + Value *instOffset = builderImpl.getInt32(0); if (m_useSoftwareVertexBufferDescriptors) { // Generated offset delta will always be aligned. instId = Intrinsic::amdgcn_raw_tbuffer_load; - auto index2Offset = builderImpl.CreateMul(vbIndex, srdStride); - instOffset = builderImpl.CreateAdd(index2Offset, instOffset); + instOffset = builderImpl.CreateMul(vbIndex, srdStride); } // For tbuffer_load, only support two types (could be vector) of fetch : d16 or i32, depending on input type. diff --git a/lgc/state/PalMetadata.cpp b/lgc/state/PalMetadata.cpp index 3f467bf640..6fc0f499e3 100644 --- a/lgc/state/PalMetadata.cpp +++ b/lgc/state/PalMetadata.cpp @@ -60,18 +60,6 @@ struct KeyValuePair { // A type used to represent a map that is stored as a constant array. using ArrayMap = KeyValuePair[]; -// ===================================================================================================================== -// Returns the value for the given key in the map. It assumes that the key will be found. -// -// @param map : The map to search. -// @param key : The key to be searched for. -unsigned findValueInArrayMap(ArrayRef map, unsigned key) { - auto entryHasGivenKey = [key](const KeyValuePair &keyValuePair) { return keyValuePair.key == key; }; - auto keyValuePair = std::find_if(map.begin(), map.end(), entryHasGivenKey); - assert(keyValuePair != map.end() && "Could not find key in the array map."); - return keyValuePair->value; -} - // ===================================================================================================================== // Returns an ArrayDocNode associated with the given document that contains the given data. // @@ -89,9 +77,7 @@ msgpack::ArrayDocNode buildArrayDocNode(msgpack::Document *document, Hash128 has // Construct empty object // // @param pipelineState : PipelineState -// @param useRegisterFieldFormat: The control of new PAL metadata or not -PalMetadata::PalMetadata(PipelineState *pipelineState, bool useRegisterFieldFormat) - : m_pipelineState(pipelineState), m_useRegisterFieldFormat(useRegisterFieldFormat) { +PalMetadata::PalMetadata(PipelineState *pipelineState) : m_pipelineState(pipelineState) { m_document = new msgpack::Document; initialize(); } @@ -101,9 +87,7 @@ PalMetadata::PalMetadata(PipelineState *pipelineState, bool useRegisterFieldForm // // @param pipelineState : PipelineState // @param blob : MsgPack PAL metadata -// @param useRegisterFieldFormat: The control of using new PAL metadata or not -PalMetadata::PalMetadata(PipelineState *pipelineState, StringRef blob, bool useRegisterFieldFormat) - : m_pipelineState(pipelineState), m_useRegisterFieldFormat(useRegisterFieldFormat) { +PalMetadata::PalMetadata(PipelineState *pipelineState, StringRef blob) : m_pipelineState(pipelineState) { m_document = new msgpack::Document; bool success = m_document->readFromBlob(blob, /*multi=*/false); assert(success && "Bad PAL metadata format"); @@ -116,9 +100,7 @@ PalMetadata::PalMetadata(PipelineState *pipelineState, StringRef blob, bool useR // // @param pipelineState : PipelineState // @param module : Pipeline IR module -// @param useRegisterFieldFormat: The control of using new PAL metadata or not -PalMetadata::PalMetadata(PipelineState *pipelineState, Module *module, bool useRegisterFieldFormat) - : m_pipelineState(pipelineState), m_useRegisterFieldFormat(useRegisterFieldFormat) { +PalMetadata::PalMetadata(PipelineState *pipelineState, Module *module) : m_pipelineState(pipelineState) { m_document = new msgpack::Document; NamedMDNode *namedMd = module->getNamedMetadata(PalMetadataName); if (namedMd && namedMd->getNumOperands()) { @@ -147,14 +129,15 @@ void PalMetadata::initialize() { // Pre-find (or create) heavily used nodes. m_pipelineNode = m_document->getRoot().getMap(true)[Util::Abi::PalCodeObjectMetadataKey::Pipelines].getArray(true)[0].getMap(true); - if (!m_useRegisterFieldFormat) - m_registers = m_pipelineNode[".registers"].getMap(true); m_userDataLimit = &m_pipelineNode[Util::Abi::PipelineMetadataKey::UserDataLimit]; if (m_userDataLimit->isEmpty()) *m_userDataLimit = 0U; m_spillThreshold = &m_pipelineNode[Util::Abi::PipelineMetadataKey::SpillThreshold]; if (m_spillThreshold->isEmpty()) *m_spillThreshold = MAX_SPILL_THRESHOLD; + for (unsigned stage = ShaderStage::Task; stage < ShaderStage::CountInternal; ++stage) { + m_pipelineState->setSpillThreshold(static_cast(stage), MAX_SPILL_THRESHOLD); + } } // ===================================================================================================================== @@ -165,13 +148,8 @@ void PalMetadata::initialize() { void PalMetadata::record(Module *module) { // Add the metadata version number. auto versionNode = m_document->getRoot().getMap(true)[Util::Abi::PalCodeObjectMetadataKey::Version].getArray(true); - if (m_useRegisterFieldFormat) { - versionNode[0] = Util::Abi::PipelineMetadataMajorVersionNew; - versionNode[1] = Util::Abi::PipelineMetadataMinorVersionNew; - } else { - versionNode[0] = Util::Abi::PipelineMetadataMajorVersion; - versionNode[1] = Util::Abi::PipelineMetadataMinorVersion; - } + versionNode[0] = Util::Abi::PipelineMetadataMajorVersionNew; + versionNode[1] = Util::Abi::PipelineMetadataMinorVersionNew; // Write the MsgPack document into an IR metadata node. // The IR named metadata node contains an MDTuple containing an MDString containing the msgpack data. @@ -202,100 +180,6 @@ void PalMetadata::mergeFromBlob(llvm::StringRef blob, bool isGlueCode) { // 0: success; *dest has been set up with the merged node. For an array, 0 means overwrite the existing array // rather than appending. auto merger = [isGlueCode](msgpack::DocNode *destNode, msgpack::DocNode srcNode, msgpack::DocNode mapKey) { - // Allow array and map merging. - if (srcNode.isMap() && destNode->isMap()) - return 0; - if (srcNode.isArray() && destNode->isArray()) - return 0; - // Allow string merging as long as the two strings have the same value. If one string has a "_fetchless" - // suffix, take the other one. This is for the benefit of the linker linking a fetch shader with a - // fetchless VS. - if (destNode->isString() && srcNode.isString()) { - if (destNode->getString() == srcNode.getString()) - return 0; - if (srcNode.getString().ends_with("_fetchless")) - return 0; - if (destNode->getString().ends_with("_fetchless")) { - *destNode = srcNode; - return 0; - } - if (srcNode.getString() == "color_export_shader") - return 0; - if (destNode->getString() == "color_export_shader") { - *destNode = srcNode; - return 0; - } - } - // Allow bool merging (for things like .uses_viewport_array_index). - if (destNode->getKind() == msgpack::Type::Boolean && srcNode.getKind() == msgpack::Type::Boolean) { - if (srcNode.getBool()) - *destNode = srcNode.getDocument()->getNode(true); - return 0; - } - // Disallow merging other than uint. - if (destNode->getKind() != msgpack::Type::UInt || srcNode.getKind() != msgpack::Type::UInt) - return -1; - // Special cases of uint merging. - if (mapKey.getKind() == msgpack::Type::UInt) { - switch (mapKey.getUInt()) { - case mmVGT_SHADER_STAGES_EN: - // Ignore new value of VGT_SHADER_STAGES_EN from glue shader, as it might accidentally make the VS - // wave32. (This relies on the glue shader's PAL metadata being merged into the vertex-processing - // part-pipeline, rather than the other way round.) - if (isGlueCode) - return 0; - break; // Use "default behavior for uint nodes" code below. - case mmSPI_SHADER_PGM_RSRC1_LS: - case mmSPI_SHADER_PGM_RSRC1_HS: - case mmSPI_SHADER_PGM_RSRC1_ES: - case mmSPI_SHADER_PGM_RSRC1_GS: - case mmSPI_SHADER_PGM_RSRC1_VS: - case mmSPI_SHADER_PGM_RSRC1_PS: { - // For the RSRC1 registers, we need to consider the VGPRs and SGPRs fields separately, and max them. - // This happens when linking in a glue shader. - SPI_SHADER_PGM_RSRC1 destRsrc1; - SPI_SHADER_PGM_RSRC1 srcRsrc1; - SPI_SHADER_PGM_RSRC1 origRsrc1; - origRsrc1.u32All = destNode->getUInt(); - srcRsrc1.u32All = srcNode.getUInt(); - destRsrc1.u32All = origRsrc1.u32All | srcRsrc1.u32All; - destRsrc1.bits.VGPRS = std::max(origRsrc1.bits.VGPRS, srcRsrc1.bits.VGPRS); - destRsrc1.bits.SGPRS = std::max(origRsrc1.bits.SGPRS, srcRsrc1.bits.SGPRS); - if (isGlueCode) { - // The float mode should come from the body of the shader and not the glue code. - destRsrc1.bits.FLOAT_MODE = origRsrc1.bits.FLOAT_MODE; - } - *destNode = srcNode.getDocument()->getNode(destRsrc1.u32All); - return 0; - } - case mmSPI_PS_INPUT_ENA: - case mmSPI_PS_INPUT_ADDR: - case mmSPI_PS_IN_CONTROL: - if (isGlueCode) - return 0; - break; // Use "default behavior for uint nodes" code below. - } - } else if (mapKey.isString()) { - // For .userdatalimit, register counts, and register limits, take the max value. - if (mapKey.getString() == Util::Abi::PipelineMetadataKey::UserDataLimit || - mapKey.getString() == Util::Abi::HardwareStageMetadataKey::SgprCount || - mapKey.getString() == Util::Abi::HardwareStageMetadataKey::SgprLimit || - mapKey.getString() == Util::Abi::HardwareStageMetadataKey::VgprCount || - mapKey.getString() == Util::Abi::HardwareStageMetadataKey::VgprLimit) { - *destNode = std::max(destNode->getUInt(), srcNode.getUInt()); - return 0; - } - // For .spillthreshold, take the min value. - if (mapKey.getString() == Util::Abi::PipelineMetadataKey::SpillThreshold) { - *destNode = std::min(destNode->getUInt(), srcNode.getUInt()); - return 0; - } - } - // Default behavior for uint nodes: "or" the values together. - *destNode = destNode->getUInt() | srcNode.getUInt(); - return 0; - }; - auto mergerNew = [isGlueCode](msgpack::DocNode *destNode, msgpack::DocNode srcNode, msgpack::DocNode mapKey) { // Allow array and map merging. if (srcNode.isMap() && destNode->isMap()) return 0; @@ -389,102 +273,25 @@ void PalMetadata::mergeFromBlob(llvm::StringRef blob, bool isGlueCode) { return 0; }; - bool success = false; - if (m_useRegisterFieldFormat) - success = m_document->readFromBlob(blob, /*multi=*/false, mergerNew); - else - success = m_document->readFromBlob(blob, /*multi=*/false, merger); + bool success = m_document->readFromBlob(blob, /*multi=*/false, merger); assert(success && "Bad PAL metadata format"); ((void)success); } -// ===================================================================================================================== -// Get the first user data register number for the given shader stage, taking into account what shader -// stages are present in the pipeline, and whether NGG is enabled. The first time this is called must be -// after PatchResourceCollect has run. -// -// @param stage : ShaderStageEnum -unsigned PalMetadata::getUserDataReg0(ShaderStageEnum stage) { - assert(!m_useRegisterFieldFormat); - if (m_userDataRegMapping[stage] != 0) - return m_userDataRegMapping[stage]; - - // Mapping not yet initialized. - // Set up ShaderStageEnum -> user data register mapping. - m_userDataRegMapping[ShaderStage::Compute] = mmCOMPUTE_USER_DATA_0; - m_userDataRegMapping[ShaderStage::Fragment] = mmSPI_SHADER_USER_DATA_PS_0; - m_userDataRegMapping[ShaderStage::Task] = mmCOMPUTE_USER_DATA_0; - m_userDataRegMapping[ShaderStage::Mesh] = mmSPI_SHADER_USER_DATA_GS_0; - - if (!m_pipelineState->getNggControl()->enableNgg) { - // GFX10+ not NGG: Same as GFX9, except ES-GS user data goes into GS registers. - m_userDataRegMapping[ShaderStage::CopyShader] = mmSPI_SHADER_USER_DATA_VS_0; - m_userDataRegMapping[ShaderStage::Geometry] = mmSPI_SHADER_USER_DATA_GS_0; - if (m_pipelineState->hasShaderStage(ShaderStage::Geometry)) - m_userDataRegMapping[ShaderStage::TessEval] = m_userDataRegMapping[ShaderStage::Geometry]; - else - m_userDataRegMapping[ShaderStage::TessEval] = mmSPI_SHADER_USER_DATA_VS_0; - m_userDataRegMapping[ShaderStage::TessControl] = mmSPI_SHADER_USER_DATA_HS_0; - if (m_pipelineState->hasShaderStage(ShaderStage::TessControl)) - m_userDataRegMapping[ShaderStage::Vertex] = m_userDataRegMapping[ShaderStage::TessControl]; - else if (m_pipelineState->hasShaderStage(ShaderStage::Geometry)) - m_userDataRegMapping[ShaderStage::Vertex] = m_userDataRegMapping[ShaderStage::Geometry]; - else - m_userDataRegMapping[ShaderStage::Vertex] = mmSPI_SHADER_USER_DATA_VS_0; - - } else { - // GFX10+ NGG - m_userDataRegMapping[ShaderStage::Geometry] = mmSPI_SHADER_USER_DATA_GS_0; - m_userDataRegMapping[ShaderStage::TessEval] = m_userDataRegMapping[ShaderStage::Geometry]; - m_userDataRegMapping[ShaderStage::TessControl] = mmSPI_SHADER_USER_DATA_HS_0; - if (m_pipelineState->hasShaderStage(ShaderStage::TessControl)) - m_userDataRegMapping[ShaderStage::Vertex] = m_userDataRegMapping[ShaderStage::TessControl]; - else - m_userDataRegMapping[ShaderStage::Vertex] = m_userDataRegMapping[ShaderStage::Geometry]; - } - - return m_userDataRegMapping[stage]; -} - -// ===================================================================================================================== -// Set the PAL metadata SPI register for a number of consecutive user data entries -// -// @param stage : ShaderStageEnum -// @param userDataIndex : User data index 0-15 or 0-31 depending on HW and shader stage -// @param userDataValue : Value to store in that entry, one of: -// - a 0-based integer for the root user data dword offset -// - one of the UserDataMapping values, e.g. UserDataMapping::GlobalTable -// @param dwordCount : Number of user data entries to set -void PalMetadata::setUserDataEntry(ShaderStageEnum stage, unsigned userDataIndex, unsigned userDataValue, - unsigned dwordCount) { - assert(!m_useRegisterFieldFormat); - // Get the start register number of SPI user data registers for this shader stage. - unsigned userDataReg = getUserDataReg0(stage); - - // Assert that the supplied user data index is not too big. - bool inRange = userDataIndex + dwordCount <= 16; - if (stage != ShaderStage::Compute && stage != ShaderStage::Task) - inRange = userDataIndex + dwordCount <= 32; - assert(inRange && "Out of range user data index"); - (void(inRange)); // Unused - - // Update userDataLimit if userData is a 0-based integer for root user data dword offset. - if (userDataValue < InterfaceData::MaxSpillTableSize && userDataValue + dwordCount > m_userDataLimit->getUInt()) - *m_userDataLimit = userDataValue + dwordCount; - - // Write the register(s) - userDataReg += userDataIndex; - while (dwordCount--) - m_registers[userDataReg++] = userDataValue++; -} - // ===================================================================================================================== // Mark that the user data spill table is used at the given offset. The SpillThreshold PAL metadata entry is // set to the minimum of any call to this function in any shader. // // @param dwordOffset : Dword offset that the spill table is used at -void PalMetadata::setUserDataSpillUsage(unsigned dwordOffset) { +// @param shaderStage : The shaderStage for the spill table is used at +void PalMetadata::setUserDataSpillUsage(unsigned dwordOffset, std::optional shaderStage) { + // Update shaderstage spillThreshold in shader level which try to fix the cache issue + // More details can refer on llpcElfWriter.cpp + if (dwordOffset < m_pipelineState->getSpillThreshold(shaderStage.value())) { + m_pipelineState->setSpillThreshold(shaderStage.value(), dwordOffset); + } + // Update pipeline spillThreshold if (dwordOffset < m_spillThreshold->getUInt()) *m_spillThreshold = dwordOffset; } @@ -529,26 +336,13 @@ void PalMetadata::fixUpRegisters() { }; // Here we use register field to determine if NGG is enabled, because enabling NGG depends on other conditions. // see PatchResourceCollect::canUseNgg. - if (m_pipelineState->useRegisterFieldFormat()) { - auto graphicsRegisters = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters].getMap(true); - if (graphicsRegisters.find(Util::Abi::GraphicsRegisterMetadataKey::VgtGsOutPrimType) != - graphicsRegisters.end()) { - auto primType = getPrimType(); - auto vgtGsOutPrimType = - graphicsRegisters[Util::Abi::GraphicsRegisterMetadataKey::VgtGsOutPrimType].getMap(true); - vgtGsOutPrimType[Util::Abi::VgtGsOutPrimTypeMetadataKey::OutprimType] = - serializeEnum(Util::Abi::GsOutPrimType(primType)); - } - } else { - unsigned vgtGsOutPrimType = mmVGT_GS_OUT_PRIM_TYPE; - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 11) { - // NOTE: Register VGT_GS_OUT_PRIM_TYPE is a special one that has different HW offset on GFX11+. - vgtGsOutPrimType = mmVGT_GS_OUT_PRIM_TYPE_GFX11; - } - if (m_registers.find(m_document->getNode(vgtGsOutPrimType)) != m_registers.end()) { - auto primType = getPrimType(); - m_registers[vgtGsOutPrimType] = primType; - } + auto graphicsRegisters = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters].getMap(true); + if (graphicsRegisters.find(Util::Abi::GraphicsRegisterMetadataKey::VgtGsOutPrimType) != graphicsRegisters.end()) { + auto primType = getPrimType(); + auto vgtGsOutPrimType = + graphicsRegisters[Util::Abi::GraphicsRegisterMetadataKey::VgtGsOutPrimType].getMap(true); + vgtGsOutPrimType[Util::Abi::VgtGsOutPrimTypeMetadataKey::OutprimType] = + serializeEnum(Util::Abi::GsOutPrimType(primType)); } } } @@ -600,59 +394,36 @@ void PalMetadata::finalizeUserDataLimit() { // for part-pipeline or shader compilation. void PalMetadata::finalizeRegisterSettings(bool isWholePipeline) { assert(m_pipelineState->isGraphics()); - if (m_pipelineState->useRegisterFieldFormat()) { - auto graphicsRegNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters].getMap(true); - - if (m_pipelineState->getColorExportState().alphaToCoverageEnable) { - auto dbShaderControl = graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::DbShaderControl].getMap(true); + auto graphicsRegNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters].getMap(true); + + if (m_pipelineState->getShaderStageMask().contains(ShaderStage::Fragment)) { + // There is a divergence between Vulkan/OpenGL and D3D11/12 specs in terms of sample mask output and alpha to + // coverage. DX spec states that "Alpha to Coverage is disabled if this register (i.e. oMask) is written in a + // shader." In contrast, the VK spec allows (or rather, does not disallow) enabling both sample mask output and + // alpha to coverage at the same time. + auto dbShaderControl = graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::DbShaderControl].getMap(true); + bool alphaToCoverageEnable = m_pipelineState->getColorExportState().alphaToCoverageEnable; + if (m_pipelineState->getOptions().sampleMaskExportOverridesAlphaToCoverage) { + dbShaderControl[Util::Abi::DbShaderControlMetadataKey::AlphaToMaskDisable] = + !alphaToCoverageEnable || dbShaderControl[Util::Abi::DbShaderControlMetadataKey::MaskExportEnable].getBool(); + } else if (alphaToCoverageEnable) { dbShaderControl[Util::Abi::DbShaderControlMetadataKey::AlphaToMaskDisable] = false; } + } - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major == 10) { - WaveBreak waveBreakSize = m_pipelineState->getShaderOptions(ShaderStage::Fragment).waveBreakSize; - auto paScShaderControl = graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::PaScShaderControl].getMap(true); - paScShaderControl[Util::Abi::PaScShaderControlMetadataKey::WaveBreakRegionSize] = - static_cast(waveBreakSize); - } - - if (m_pipelineState->getRasterizerState().innerCoverage) - graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::AaCoverageToShaderSelect] = - serializeEnum(Util::Abi::CoverageToShaderSel(INPUT_INNER_COVERAGE)); - else - graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::AaCoverageToShaderSelect] = - serializeEnum(Util::Abi::CoverageToShaderSel(INPUT_COVERAGE)); - } else { - // Set PA_CL_CLIP_CNTL from pipeline state settings. - // DX_CLIP_SPACE_DEF, ZCLIP_NEAR_DISABLE and ZCLIP_FAR_DISABLE are now set internally by PAL (as of - // version 629), and are no longer part of the PAL ELF ABI. - const bool rasterizerDiscardEnable = m_pipelineState->getRasterizerState().rasterizerDiscardEnable; - PA_CL_CLIP_CNTL paClClipCntl = {}; - paClClipCntl.bits.DX_LINEAR_ATTR_CLIP_ENA = true; - paClClipCntl.bits.DX_RASTERIZATION_KILL = rasterizerDiscardEnable; - setRegister(mmPA_CL_CLIP_CNTL, paClClipCntl.u32All); - - if (m_pipelineState->getColorExportState().alphaToCoverageEnable) { - DB_SHADER_CONTROL dbShaderControl = {}; - dbShaderControl.u32All = getRegister(mmDB_SHADER_CONTROL); - dbShaderControl.bitfields.ALPHA_TO_MASK_DISABLE = 0; - setRegister(mmDB_SHADER_CONTROL, dbShaderControl.u32All); - } - - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major == 10) { - WaveBreak waveBreakSize = m_pipelineState->getShaderOptions(ShaderStage::Fragment).waveBreakSize; - PA_SC_SHADER_CONTROL paScShaderControl = {}; - paScShaderControl.gfx10.WAVE_BREAK_REGION_SIZE = static_cast(waveBreakSize); - setRegister(mmPA_SC_SHADER_CONTROL, paScShaderControl.u32All); - } - - PA_SC_AA_CONFIG paScAaConfig = {}; - if (m_pipelineState->getRasterizerState().innerCoverage) { - paScAaConfig.bitfields.COVERAGE_TO_SHADER_SELECT = INPUT_INNER_COVERAGE; - } else { - paScAaConfig.bitfields.COVERAGE_TO_SHADER_SELECT = INPUT_COVERAGE; - } - setRegister(mmPA_SC_AA_CONFIG, paScAaConfig.u32All); + if (m_pipelineState->getTargetInfo().getGfxIpVersion().major == 10) { + WaveBreak waveBreakSize = m_pipelineState->getShaderOptions(ShaderStage::Fragment).waveBreakSize; + auto paScShaderControl = graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::PaScShaderControl].getMap(true); + paScShaderControl[Util::Abi::PaScShaderControlMetadataKey::WaveBreakRegionSize] = + static_cast(waveBreakSize); } + + if (m_pipelineState->getRasterizerState().innerCoverage) + graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::AaCoverageToShaderSelect] = + serializeEnum(Util::Abi::CoverageToShaderSel(INPUT_INNER_COVERAGE)); + else + graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::AaCoverageToShaderSelect] = + serializeEnum(Util::Abi::CoverageToShaderSel(INPUT_COVERAGE)); } // ===================================================================================================================== @@ -679,29 +450,16 @@ void PalMetadata::finalizeInputControlRegisterSetting() { const bool usesViewportArrayIndex = usesViewportArrayIndexNode->getBool(); if (!usesViewportArrayIndex) { - if (m_useRegisterFieldFormat) { - auto spiPsInputCntl = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters] - .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::SpiPsInputCntl] - .getArray(true); - // Check if pointCoordLoc is not used - auto spiPsInputCntlElem = spiPsInputCntl[viewportIndexLoc].getMap(true); - if (!spiPsInputCntlElem[Util::Abi::SpiPsInputCntlMetadataKey::PtSpriteTex].getBool()) { - // Use default value 0 for viewport array index if it is only used in FS (not set in other stages) - constexpr unsigned defaultVal = (1 << 5); - spiPsInputCntlElem[Util::Abi::SpiPsInputCntlMetadataKey::Offset] = defaultVal; - spiPsInputCntlElem[Util::Abi::SpiPsInputCntlMetadataKey::FlatShade] = false; - } - } else { - SPI_PS_INPUT_CNTL_0 spiPsInputCntl = {}; - spiPsInputCntl.u32All = getRegister(mmSPI_PS_INPUT_CNTL_0 + viewportIndexLoc); - // Check if pointCoordLoc is not used - if (!spiPsInputCntl.bits.PT_SPRITE_TEX) { - // Use default value 0 for viewport array index if it is only used in FS (not set in other stages) - constexpr unsigned defaultVal = (1 << 5); - spiPsInputCntl.bits.OFFSET = defaultVal; - spiPsInputCntl.bits.FLAT_SHADE = false; - setRegister(mmSPI_PS_INPUT_CNTL_0 + viewportIndexLoc, spiPsInputCntl.u32All); - } + auto spiPsInputCntl = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters] + .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::SpiPsInputCntl] + .getArray(true); + // Check if pointCoordLoc is not used + auto spiPsInputCntlElem = spiPsInputCntl[viewportIndexLoc].getMap(true); + if (!spiPsInputCntlElem[Util::Abi::SpiPsInputCntlMetadataKey::PtSpriteTex].getBool()) { + // Use default value 0 for viewport array index if it is only used in FS (not set in other stages) + constexpr unsigned defaultVal = (1 << 5); + spiPsInputCntlElem[Util::Abi::SpiPsInputCntlMetadataKey::Offset] = defaultVal; + spiPsInputCntlElem[Util::Abi::SpiPsInputCntlMetadataKey::FlatShade] = false; } } } @@ -737,32 +495,6 @@ void PalMetadata::finalizePipeline(bool isWholePipeline) { eraseFragmentInputInfo(); } -// ===================================================================================================================== -// Get a register value in PAL metadata. Returns 0 if the node does not have an entry. -// -// @param regNum : Register number -unsigned PalMetadata::getRegister(unsigned regNum) { - assert(!m_useRegisterFieldFormat); - auto mapIt = m_registers.find(m_document->getNode(regNum)); - if (mapIt == m_registers.end()) { - return 0; - } - msgpack::DocNode &node = mapIt->second; - assert(node.getKind() == msgpack::Type::UInt); - return node.getUInt(); -} - -// ===================================================================================================================== -// Set a register value in PAL metadata. -// -// @param regNum : Register number -// @param value : Value to set -void PalMetadata::setRegister(unsigned regNum, unsigned newValue) { - assert(!m_useRegisterFieldFormat); - msgpack::DocNode &node = m_registers[regNum]; - node = newValue; -} - // ===================================================================================================================== // Set userDataLimit to maximum (the size of the root user data table, excluding vertex buffer and streamout). // This is called if spill is in use, or if there are root user data nodes but none of them are used (PAL does @@ -848,67 +580,6 @@ void PalMetadata::eraseColorExportInfo() { m_pipelineNode.erase(m_document->getNode(PipelineMetadataKey::ColorExports)); } -// ===================================================================================================================== -// Get the VS entry register info. Used by the linker to generate the fetch shader. -// -// @param [out] regInfo : Where to store VS entry register info -void PalMetadata::getVsEntryRegInfo(VsEntryRegInfo ®Info) { - regInfo = {}; - std::string hwStageName; - regInfo.callingConv = getCallingConventionForFirstHardwareShaderStage(hwStageName); - unsigned sgprsBeforeUserData = getNumberOfSgprsBeforeUserData(regInfo.callingConv); - unsigned sgprsAfterUserData = getNumberOfSgprsAfterUserData(regInfo.callingConv); - - regInfo.vertexId = getVertexIdOffset(regInfo.callingConv); - regInfo.instanceId = getInstanceIdOffset(regInfo.callingConv); - regInfo.vgprCount = getVgprCount(regInfo.callingConv); - regInfo.wave32 = isWave32(regInfo.callingConv); - - if (m_useRegisterFieldFormat) { - auto hardwareStages = m_pipelineNode[Util::Abi::PipelineMetadataKey::HardwareStages].getMap(true); - assert(hardwareStages.find(hwStageName) != hardwareStages.end()); - auto hwStage = hardwareStages[hwStageName].getMap(true); - auto userDataRegMap = hwStage[Util::Abi::HardwareStageMetadataKey::UserDataRegMap].getArray(true); - regInfo.sgprCount = - sgprsBeforeUserData + hwStage[Util::Abi::HardwareStageMetadataKey::UserSgprs].getUInt() + sgprsAfterUserData; - regInfo.vertexBufferTable = - sgprsBeforeUserData + getOffsetOfUserDataReg(userDataRegMap, UserDataMapping::VertexBufferTable); - regInfo.baseVertex = sgprsBeforeUserData + getOffsetOfUserDataReg(userDataRegMap, UserDataMapping::BaseVertex); - regInfo.baseInstance = sgprsBeforeUserData + getOffsetOfUserDataReg(userDataRegMap, UserDataMapping::BaseInstance); - } else { - unsigned userDataReg0 = getFirstUserDataReg(regInfo.callingConv); - auto userDataRegIt = m_registers.find(m_document->getNode(userDataReg0)); - assert(userDataRegIt != m_registers.end()); - regInfo.sgprCount = sgprsBeforeUserData + getUserDataCount(regInfo.callingConv) + sgprsAfterUserData; - regInfo.vertexBufferTable = - sgprsBeforeUserData + getOffsetOfUserDataReg(userDataRegIt, UserDataMapping::VertexBufferTable); - regInfo.baseVertex = sgprsBeforeUserData + getOffsetOfUserDataReg(userDataRegIt, UserDataMapping::BaseVertex); - regInfo.baseInstance = sgprsBeforeUserData + getOffsetOfUserDataReg(userDataRegIt, UserDataMapping::BaseInstance); - } -} - -// ===================================================================================================================== -// Returns the number of user data registers are used for the shader with the given calling convention. -// -// @param callingConv : The calling convention of the shader we are interested in. -unsigned PalMetadata::getUserDataCount(unsigned callingConv) { - assert(!m_useRegisterFieldFormat); - // Should we just define mmSPI_SHADER_PGM_RSRC2_* for all shader types? The comment before the definition of - // mmSPI_SHADER_PGM_RSRC2_VS in AbiMetadata.h says that do not want to. - static const ArrayMap callingConvToRsrc2Map = {{CallingConv::AMDGPU_LS, mmSPI_SHADER_USER_DATA_LS_0 - 1}, - {CallingConv::AMDGPU_HS, mmSPI_SHADER_USER_DATA_HS_0 - 1}, - {CallingConv::AMDGPU_ES, mmSPI_SHADER_USER_DATA_ES_0 - 1}, - {CallingConv::AMDGPU_GS, mmSPI_SHADER_USER_DATA_GS_0 - 1}, - {CallingConv::AMDGPU_VS, mmSPI_SHADER_USER_DATA_VS_0 - 1}}; - - SPI_SHADER_PGM_RSRC2 rsrc2; - rsrc2.u32All = m_registers[m_document->getNode(findValueInArrayMap(callingConvToRsrc2Map, callingConv))].getUInt(); - - // For GFX9+, we ignore the USER_SGPR_MSB field. We know that there is at least one user SGPR, so if we find that - // USER_SGPR is 0, it must mean 32. - return rsrc2.bits.USER_SGPR == 0 ? 32 : rsrc2.bits.USER_SGPR; -} - // ===================================================================================================================== // Get the llvm type for that corresponds to tyName. Returns nullptr if no such type exists. // @@ -947,66 +618,53 @@ void PalMetadata::updateSpiShaderColFormat(ArrayRef expFormats) { for (auto [i, expFormat] : enumerate(expFormats)) spiShaderColFormat |= expFormat << (4 * i); - if (m_pipelineState->useRegisterFieldFormat()) { - auto spiShaderColFormatNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters] - .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::SpiShaderColFormat] - .getMap(true); - spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_0ExportFormat] = spiShaderColFormat & 0xF; - spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_1ExportFormat] = - (spiShaderColFormat >> 4) & 0xF; - spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_2ExportFormat] = - (spiShaderColFormat >> 8) & 0xF; - spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_3ExportFormat] = - (spiShaderColFormat >> 12) & 0xF; - spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_4ExportFormat] = - (spiShaderColFormat >> 16) & 0xF; - spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_5ExportFormat] = - (spiShaderColFormat >> 20) & 0xF; - spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_6ExportFormat] = - (spiShaderColFormat >> 24) & 0xF; - spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_7ExportFormat] = - (spiShaderColFormat >> 28) & 0xF; - } else { - setRegister(mmSPI_SHADER_COL_FORMAT, spiShaderColFormat); - } + auto spiShaderColFormatNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters] + .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::SpiShaderColFormat] + .getMap(true); + spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_0ExportFormat] = spiShaderColFormat & 0xF; + spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_1ExportFormat] = (spiShaderColFormat >> 4) & 0xF; + spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_2ExportFormat] = (spiShaderColFormat >> 8) & 0xF; + spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_3ExportFormat] = + (spiShaderColFormat >> 12) & 0xF; + spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_4ExportFormat] = + (spiShaderColFormat >> 16) & 0xF; + spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_5ExportFormat] = + (spiShaderColFormat >> 20) & 0xF; + spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_6ExportFormat] = + (spiShaderColFormat >> 24) & 0xF; + spiShaderColFormatNode[Util::Abi::SpiShaderColFormatMetadataKey::Col_7ExportFormat] = + (spiShaderColFormat >> 28) & 0xF; } // ===================================================================================================================== // Updates the CB_SHADER_MASK entry. // void PalMetadata::updateCbShaderMask(unsigned cbShaderMask) { - if (m_pipelineState->useRegisterFieldFormat()) { - auto cbShaderMaskNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters] - .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::CbShaderMask] - .getMap(true); - cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output0Enable] = cbShaderMask & 0xF; - cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output1Enable] = (cbShaderMask >> 4) & 0xF; - cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output2Enable] = (cbShaderMask >> 8) & 0xF; - cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output3Enable] = (cbShaderMask >> 12) & 0xF; - cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output4Enable] = (cbShaderMask >> 16) & 0xF; - cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output5Enable] = (cbShaderMask >> 20) & 0xF; - cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output6Enable] = (cbShaderMask >> 24) & 0xF; - cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output7Enable] = (cbShaderMask >> 28) & 0xF; - } else { - setRegister(mmCB_SHADER_MASK, cbShaderMask); - } + auto cbShaderMaskNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters] + .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::CbShaderMask] + .getMap(true); + cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output0Enable] = cbShaderMask & 0xF; + cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output1Enable] = (cbShaderMask >> 4) & 0xF; + cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output2Enable] = (cbShaderMask >> 8) & 0xF; + cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output3Enable] = (cbShaderMask >> 12) & 0xF; + cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output4Enable] = (cbShaderMask >> 16) & 0xF; + cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output5Enable] = (cbShaderMask >> 20) & 0xF; + cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output6Enable] = (cbShaderMask >> 24) & 0xF; + cbShaderMaskNode[Util::Abi::CbShaderMaskMetadataKey::Output7Enable] = (cbShaderMask >> 28) & 0xF; } // ===================================================================================================================== // Updates the DB shader control that depends on the CB state. // void PalMetadata::updateDbShaderControl() { - if (m_pipelineState->useRegisterFieldFormat()) { - auto dbShaderControl = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters] - .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::DbShaderControl] - .getMap(true); + auto graphicsRegNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters].getMap(true); + auto dbShaderControl = graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::DbShaderControl].getMap(true); + bool alphaToCoverageEnable = m_pipelineState->getColorExportState().alphaToCoverageEnable; + if (m_pipelineState->getOptions().sampleMaskExportOverridesAlphaToCoverage) { dbShaderControl[Util::Abi::DbShaderControlMetadataKey::AlphaToMaskDisable] = - !m_pipelineState->getColorExportState().alphaToCoverageEnable; + !alphaToCoverageEnable || dbShaderControl[Util::Abi::DbShaderControlMetadataKey::MaskExportEnable].getBool(); } else { - DB_SHADER_CONTROL dbShaderControl = {}; - dbShaderControl.u32All = getRegister(mmDB_SHADER_CONTROL); - dbShaderControl.bitfields.ALPHA_TO_MASK_DISABLE = !m_pipelineState->getColorExportState().alphaToCoverageEnable; - setRegister(mmDB_SHADER_CONTROL, dbShaderControl.u32All); + dbShaderControl[Util::Abi::DbShaderControlMetadataKey::AlphaToMaskDisable] = !alphaToCoverageEnable; } } @@ -1015,14 +673,8 @@ void PalMetadata::updateDbShaderControl() { // // @param zExportFormat : new z-export-format void PalMetadata::setSpiShaderZFormat(unsigned zExportFormat) { - if (m_pipelineState->useRegisterFieldFormat()) { - auto graphicsRegNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters].getMap(true); - graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::SpiShaderZFormat] = zExportFormat; - } else { - SPI_SHADER_Z_FORMAT spiShaderZFormat = {}; - spiShaderZFormat.bitfields.Z_EXPORT_FORMAT = zExportFormat; - setRegister(mmSPI_SHADER_Z_FORMAT, spiShaderZFormat.u32All); - } + auto graphicsRegNode = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters].getMap(true); + graphicsRegNode[Util::Abi::GraphicsRegisterMetadataKey::SpiShaderZFormat] = zExportFormat; } // ===================================================================================================================== @@ -1180,218 +832,6 @@ unsigned PalMetadata::getFragmentShaderBuiltInLoc(unsigned builtin) { return InvalidValue; } -// ===================================================================================================================== -// Returns the calling convention of the first hardware shader stage that will be executed in the pipeline. -// -// @param hwStageName : The hardware stage name that is filled in with the new register field format. -unsigned PalMetadata::getCallingConventionForFirstHardwareShaderStage(std::string &hwStageName) { - if (m_useRegisterFieldFormat) { - auto hardwareStages = m_pipelineNode[Util::Abi::PipelineMetadataKey::HardwareStages].getMap(true); - hwStageName = HwStageNames[static_cast(Util::Abi::HardwareStage::Hs)]; - if (hardwareStages.find(hwStageName) != hardwareStages.end()) - return CallingConv::AMDGPU_HS; - - hwStageName = HwStageNames[static_cast(Util::Abi::HardwareStage::Gs)]; - if (hardwareStages.find(hwStageName) != hardwareStages.end()) - return CallingConv::AMDGPU_GS; - - hwStageName = HwStageNames[static_cast(Util::Abi::HardwareStage::Vs)]; - if (hardwareStages.find(hwStageName) != hardwareStages.end()) - return CallingConv::AMDGPU_VS; - - hwStageName = HwStageNames[static_cast(Util::Abi::HardwareStage::Cs)]; - return CallingConv::AMDGPU_CS; - } - constexpr unsigned hwShaderStageCount = 6; - static const std::pair shaderTable[hwShaderStageCount] = { - {mmSPI_SHADER_PGM_RSRC1_LS, CallingConv::AMDGPU_LS}, {mmSPI_SHADER_PGM_RSRC1_HS, CallingConv::AMDGPU_HS}, - {mmSPI_SHADER_PGM_RSRC1_ES, CallingConv::AMDGPU_ES}, {mmSPI_SHADER_PGM_RSRC1_GS, CallingConv::AMDGPU_GS}, - {mmSPI_SHADER_PGM_RSRC1_VS, CallingConv::AMDGPU_VS}, {mmCOMPUTE_PGM_RSRC1, CallingConv::AMDGPU_CS}}; - - for (unsigned i = 0; i < hwShaderStageCount; ++i) { - auto entry = m_registers.find(m_document->getNode(shaderTable[i].first)); - if (entry != m_registers.end()) - return shaderTable[i].second; - } - return CallingConv::AMDGPU_CS; -} - -// ===================================================================================================================== -// Returns the offset of the first user data register for the given calling convention. -// -// @param callingConv : The calling convention -unsigned PalMetadata::getFirstUserDataReg(unsigned callingConv) { - assert(!m_useRegisterFieldFormat); - static const ArrayMap shaderTable = { - {CallingConv::AMDGPU_LS, mmSPI_SHADER_USER_DATA_LS_0}, {CallingConv::AMDGPU_HS, mmSPI_SHADER_USER_DATA_HS_0}, - {CallingConv::AMDGPU_ES, mmSPI_SHADER_USER_DATA_ES_0}, {CallingConv::AMDGPU_GS, mmSPI_SHADER_USER_DATA_GS_0}, - {CallingConv::AMDGPU_VS, mmSPI_SHADER_USER_DATA_VS_0}, {CallingConv::AMDGPU_CS, mmCOMPUTE_PGM_RSRC1}}; - - ArrayRef currentShaderTable(shaderTable); - return findValueInArrayMap(currentShaderTable, callingConv); -} - -// ===================================================================================================================== -// Returns to number of SGPRs before the SGPRs used for other purposes before the first user data SGPRs. -// -// @param callingConv : The calling convention of the shader -unsigned PalMetadata::getNumberOfSgprsBeforeUserData(unsigned callingConv) { - switch (callingConv) { - case CallingConv::AMDGPU_CS: - case CallingConv::AMDGPU_VS: - case CallingConv::AMDGPU_PS: - return 0; - default: - // Merged shader have an extra 8 SGPRs before user data. - return 8; - } -} - -// ===================================================================================================================== -// Returns the offset of the userDataMapping from firstUserDataNode. Returns UINT_MAX if it cannot be found. -// -// @param firstUserDataNode : An iterator identifying the starting point for the search. It is assumed that it points -// to the first user data node for some shader stage. -// @param userDataMapping : The user data mapping that is being search for. -// -unsigned PalMetadata::getOffsetOfUserDataReg(std::map::iterator firstUserDataNode, - UserDataMapping userDataMapping) { - assert(!m_useRegisterFieldFormat); - unsigned firstReg = firstUserDataNode->first.getUInt(); - unsigned lastReg = firstReg + m_pipelineState->getTargetInfo().getGpuProperty().maxUserDataCount; - for (auto &userDataNode : make_range(firstUserDataNode, m_registers.end())) { - unsigned reg = userDataNode.first.getUInt(); - if (reg >= lastReg) - return UINT_MAX; - if (static_cast(userDataNode.second.getUInt()) == userDataMapping) - return reg - firstReg; - } - return UINT_MAX; -} - -// ===================================================================================================================== -// Returns the offset of the userDataMapping from firstUserDataNode. Returns UINT_MAX if it cannot be found. -// -// @param userDataReg : -// @param userDataMapping : The user data mapping that is being search for. -unsigned PalMetadata::getOffsetOfUserDataReg(msgpack::ArrayDocNode &userDataReg, UserDataMapping userDataMapping) { - for (unsigned id = 0; id < userDataReg.size(); ++id) { - if (userDataReg[id].getUInt() == static_cast(userDataMapping)) - return id; - } - return UINT_MAX; -} - -// ===================================================================================================================== -// Returns the upper bound on the number of SGPRs that contain parameters for the shader after the user data. -// -// @param callingConv : The calling convention of the shader stage -unsigned PalMetadata::getNumberOfSgprsAfterUserData(unsigned callingConv) { - // Conservatively set the total number of input SGPRs. A merged shader with 8 SGPRs before user data - // does not have any extra ones after; an unmerged shader has up to 10 SGPRs after. - if (getNumberOfSgprsBeforeUserData(callingConv) == 0) - return 10; - return 0; -} - -// ===================================================================================================================== -// Returns the offset of the vertex id in the parameter vertex registers. -// -// @param callingConv : The calling convention of the shader stage -unsigned PalMetadata::getVertexIdOffset(unsigned callingConv) { - switch (callingConv) { - case CallingConv::AMDGPU_LS: // Before-GFX9 unmerged LS - case CallingConv::AMDGPU_ES: // Before-GFX9 unmerged ES - case CallingConv::AMDGPU_VS: - return 0; - case CallingConv::AMDGPU_HS: // GFX9+ LS+HS - return 2; - case CallingConv::AMDGPU_GS: // GFX9+ ES+GS - return 5; - default: - llvm_unreachable("Unexpected calling convention."); - return UINT_MAX; - } -} - -// ===================================================================================================================== -// Returns the offset of the register containing the instance id in the shader. -// -// @param callingConv : The calling convention of the shader stage -unsigned PalMetadata::getInstanceIdOffset(unsigned callingConv) { - switch (callingConv) { - case CallingConv::AMDGPU_LS: // Before-GFX9 unmerged LS - case CallingConv::AMDGPU_ES: // Before-GFX9 unmerged ES - case CallingConv::AMDGPU_VS: - return 3; - case CallingConv::AMDGPU_HS: // GFX9+ LS+HS - return 5; - case CallingConv::AMDGPU_GS: // GFX9+ ES+GS - return 8; - default: - llvm_unreachable("Unexpected calling convention."); - return UINT_MAX; - } -} - -// ===================================================================================================================== -// Returns the number of VGPRs used for inputs to the shader. -// -// @param callingConv : The calling convention of the shader stage -unsigned PalMetadata::getVgprCount(unsigned callingConv) { - switch (callingConv) { - case CallingConv::AMDGPU_LS: // Before-GFX9 unmerged LS - case CallingConv::AMDGPU_ES: // Before-GFX9 unmerged ES - case CallingConv::AMDGPU_VS: - return 4; - case CallingConv::AMDGPU_HS: // GFX9+ LS+HS - return 6; - case CallingConv::AMDGPU_GS: // GFX9+ ES+GS - return 9; - default: - llvm_unreachable("Unexpected calling convention."); - return UINT_MAX; - } -} - -// ===================================================================================================================== -// Returns true if the shader runs in wave32 mode. -// -// @param callingConv : The calling convention of the shader stage -bool PalMetadata::isWave32(unsigned callingConv) { - assert(m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 10); - - if (m_useRegisterFieldFormat) { - auto vgtShaderStagesEn = m_pipelineNode[Util::Abi::PipelineMetadataKey::GraphicsRegisters] - .getMap(true)[Util::Abi::GraphicsRegisterMetadataKey::VgtShaderStagesEn] - .getMap(true); - switch (callingConv) { - case CallingConv::AMDGPU_VS: - return vgtShaderStagesEn[Util::Abi::VgtShaderStagesEnMetadataKey::VsW32En].getBool(); - case CallingConv::AMDGPU_HS: // GFX9+ LS+HS - return vgtShaderStagesEn[Util::Abi::VgtShaderStagesEnMetadataKey::HsW32En].getBool(); - case CallingConv::AMDGPU_GS: // GFX9+ ES+GS - return vgtShaderStagesEn[Util::Abi::VgtShaderStagesEnMetadataKey::GsW32En].getBool(); - default: - llvm_unreachable("Unexpected calling convention."); - return false; - } - } - VGT_SHADER_STAGES_EN vgtShaderStagesEn; - vgtShaderStagesEn.u32All = m_registers[m_document->getNode(mmVGT_SHADER_STAGES_EN)].getUInt(); - switch (callingConv) { - case CallingConv::AMDGPU_VS: - return vgtShaderStagesEn.gfx10.VS_W32_EN; - case CallingConv::AMDGPU_HS: // GFX9+ LS+HS - return vgtShaderStagesEn.gfx10.HS_W32_EN; - case CallingConv::AMDGPU_GS: // GFX9+ ES+GS - return vgtShaderStagesEn.gfx10.GS_W32_EN; - default: - llvm_unreachable("Unexpected calling convention."); - return false; - } -} - // ===================================================================================================================== // Serialize Util::Abi::CoverageToShaderSel to a string. // @@ -1464,3 +904,9 @@ void PalMetadata::setUserDataLimit(unsigned value) { if (value > m_userDataLimit->getUInt()) *m_userDataLimit = value; } + +// ===================================================================================================================== +// Set Util::Abi::PipelineMetadataKey::PsDummyExport to true +void PalMetadata::setPsDummyExport() { + m_pipelineNode[Util::Abi::PipelineMetadataKey::PsDummyExport] = true; +} diff --git a/lgc/state/PipelineState.cpp b/lgc/state/PipelineState.cpp index e5895c0421..90148161f7 100644 --- a/lgc/state/PipelineState.cpp +++ b/lgc/state/PipelineState.cpp @@ -33,6 +33,7 @@ #include "lgc/LgcContext.h" #include "lgc/PassManager.h" #include "lgc/patch/FragColorExport.h" +#include "lgc/state/AbiMetadata.h" #include "lgc/state/PalMetadata.h" #include "lgc/state/TargetInfo.h" #include "lgc/util/Internal.h" @@ -54,9 +55,6 @@ static cl::opt EnableTessOffChip("enable-tess-offchip", cl::desc("Enable t static cl::opt EnableRowExport("enable-row-export", cl::desc("Enable row export for mesh shader"), cl::init(true)); -cl::opt UseRegisterFieldFormat("use-register-field-format", cl::desc("Use register field format in pipeline ELF"), - cl::init(true)); - // Names for named metadata nodes when storing and reading back pipeline state static const char UnlinkedMetadataName[] = "lgc.unlinked"; static const char PreRasterHasGsMetadataName[] = "lgc.prerast.has.gs"; @@ -331,7 +329,6 @@ ComputeShaderMode Pipeline::getComputeShaderMode(Module &module) { // @param emitLgc : Whether the option -emit-lgc is on PipelineState::PipelineState(LgcContext *builderContext, bool emitLgc) : Pipeline(builderContext), m_emitLgc(emitLgc), m_meshRowExport(EnableRowExport) { - m_registerFieldFormat = UseRegisterFieldFormat; m_tessLevel.inner[0] = -1.0f; m_tessLevel.inner[1] = -1.0f; m_tessLevel.outer[0] = -1.0f; @@ -368,7 +365,7 @@ unsigned PipelineState::getPalAbiVersion() const { // Get PalMetadata object, creating an empty one if necessary PalMetadata *PipelineState::getPalMetadata() { if (!m_palMetadata) - m_palMetadata = new PalMetadata(this, m_registerFieldFormat); + m_palMetadata = new PalMetadata(this); return m_palMetadata; } @@ -386,7 +383,7 @@ void PipelineState::clearPalMetadata() { // @param isGlueCode : True if the blob was generated for glue code. void PipelineState::mergePalMetadataFromBlob(StringRef blob, bool isGlueCode) { if (!m_palMetadata) - m_palMetadata = new PalMetadata(this, blob, m_registerFieldFormat); + m_palMetadata = new PalMetadata(this, blob); else m_palMetadata->mergeFromBlob(blob, isGlueCode); } @@ -474,7 +471,7 @@ void PipelineState::readState(Module *module) { readColorExportState(module); readGraphicsState(module); if (!m_palMetadata) - m_palMetadata = new PalMetadata(this, module, m_registerFieldFormat); + m_palMetadata = new PalMetadata(this, module); setXfbStateMetadata(module); } @@ -1364,7 +1361,6 @@ unsigned PipelineState::getShaderWaveSize(ShaderStageEnum stage) { // // @param stage : Shader stage unsigned PipelineState::getMergedShaderWaveSize(ShaderStageEnum stage) { - assert(getTargetInfo().getGfxIpVersion().major >= 10); unsigned waveSize = m_waveSize[stage]; // NOTE: For GFX9+, two shaders are merged as a shader pair. The wave size is determined by the larger one. That is @@ -1408,6 +1404,114 @@ unsigned PipelineState::getMergedShaderWaveSize(ShaderStageEnum stage) { } } +// ===================================================================================================================== +// Builds mapping of ShaderStageEnum to Util::Abi::HardwareStageFlagBits for pipeline stages. +// Assigns an Util::Abi::PipelineType to the pipeline. +void PipelineState::buildAbiHwShaderMap() { + assert(isGraphics()); + + const bool hasVs = hasShaderStage(ShaderStage::Vertex); + const bool hasTcs = hasShaderStage(ShaderStage::TessControl); + const bool hasTes = hasShaderStage(ShaderStage::TessEval); + const bool hasTs = hasTcs || hasTes; + const bool hasGs = hasShaderStage(ShaderStage::Geometry); + const bool hasTask = hasShaderStage(ShaderStage::Task); + const bool hasMesh = hasShaderStage(ShaderStage::Mesh); + + m_abiPipelineType = Util::Abi::PipelineType::VsPs; + m_abiHwShaderMap.clear(); + + if (hasTask || hasMesh) { + assert(getTargetInfo().getGfxIpVersion() >= GfxIpVersion({10, 3})); + if (hasMesh) { + m_abiHwShaderMap[ShaderStage::Mesh] = Util::Abi::HwShaderGs; + m_abiPipelineType = Util::Abi::PipelineType::Mesh; + } + if (hasTask) { + m_abiHwShaderMap[ShaderStage::Task] = Util::Abi::HwShaderCs; + m_abiPipelineType = Util::Abi::PipelineType::TaskMesh; + } + } else { + if (hasGs) { + auto preGsStage = getPrevShaderStage(ShaderStage::Geometry); + if (preGsStage != ShaderStage::Invalid) + m_abiHwShaderMap[preGsStage] = Util::Abi::HwShaderGs; + } + if (hasTcs) { + m_abiHwShaderMap[ShaderStage::TessControl] = Util::Abi::HwShaderHs; + if (hasVs) + m_abiHwShaderMap[ShaderStage::Vertex] = Util::Abi::HwShaderHs; + } + + auto lastVertexProcessingStage = getLastVertexProcessingStage(); + if (lastVertexProcessingStage != ShaderStage::Invalid) { + if (lastVertexProcessingStage == ShaderStage::CopyShader) + lastVertexProcessingStage = ShaderStage::Geometry; + if (isNggEnabled()) { + m_abiHwShaderMap[lastVertexProcessingStage] = Util::Abi::HwShaderGs; + m_abiPipelineType = hasTs ? Util::Abi::PipelineType::NggTess : Util::Abi::PipelineType::Ngg; + } else { + m_abiHwShaderMap[lastVertexProcessingStage] = Util::Abi::HwShaderVs; + if (hasGs) + m_abiHwShaderMap[lastVertexProcessingStage] |= Util::Abi::HwShaderGs; + + if (hasTs && hasGs) + m_abiPipelineType = Util::Abi::PipelineType::GsTess; + else if (hasTs) + m_abiPipelineType = Util::Abi::PipelineType::Tess; + else if (hasGs) + m_abiPipelineType = Util::Abi::PipelineType::Gs; + else + m_abiPipelineType = Util::Abi::PipelineType::VsPs; + } + } + } + if (hasShaderStage(ShaderStage::Fragment)) + m_abiHwShaderMap[ShaderStage::Fragment] = Util::Abi::HwShaderPs; +} + +// ===================================================================================================================== +// Gets ABI pipeline type for pipeline. +// +// @return Util::Abi::PipelineType as unsigned +unsigned PipelineState::getAbiPipelineType() { + if (m_abiHwShaderMap.empty()) + buildAbiHwShaderMap(); + return m_abiPipelineType; +} + +// ===================================================================================================================== +// Gets map of shader stages to hardware stage masks. +// Computes the mapping if required. +// +// @return map from ShaderStageEnum to Util::Abi::HardwareStageFlagBits +const DenseMap *PipelineState::getAbiHwShaderMap() { + if (m_abiHwShaderMap.empty()) + buildAbiHwShaderMap(); + return &m_abiHwShaderMap; +} + +// ===================================================================================================================== +// Gets hardware stage for a given shader stage after merging. +// +// @param stage : Shader stage +// @return unsigned representing Util::Abi::HardwareStageFlagBits of shader stage +unsigned PipelineState::getShaderHwStageMask(ShaderStageEnum stage) { + if (!isGraphics()) + return static_cast(Util::Abi::HwShaderCs); + + auto &abiHwShaderMap = *getAbiHwShaderMap(); + if (stage == ShaderStage::CopyShader) + stage = ShaderStage::Geometry; + + auto hwStageMask = abiHwShaderMap.find(stage); + if (hwStageMask != abiHwShaderMap.end()) + return hwStageMask->second; + + // Return no hardware stages enabled. + return 0; +} + // ===================================================================================================================== // Get subgroup size for the specified shader stage. // @@ -1432,8 +1536,7 @@ unsigned PipelineState::getShaderSubgroupSize(ShaderStageEnum stage) { // @param stage : Shader stage void PipelineState::setShaderDefaultWaveSize(ShaderStageEnum stage) { ShaderStageEnum checkingStage = stage; - const bool isGfx10Plus = getTargetInfo().getGfxIpVersion().major >= 10; - if (isGfx10Plus && stage == ShaderStage::Geometry && !hasShaderStage(ShaderStage::Geometry)) { + if (stage == ShaderStage::Geometry && !hasShaderStage(ShaderStage::Geometry)) { // NOTE: For NGG, GS could be absent and VS/TES acts as part of it in the merged shader. // In such cases, we check the property of VS or TES. checkingStage = hasShaderStage(ShaderStage::TessEval) ? ShaderStage::TessEval : ShaderStage::Vertex; @@ -1446,76 +1549,75 @@ void PipelineState::setShaderDefaultWaveSize(ShaderStageEnum stage) { unsigned waveSize = getTargetInfo().getGpuProperty().waveSize; unsigned subgroupSize = waveSize; - if (isGfx10Plus) { - // NOTE: GPU property wave size is used in shader, unless: - // 1) A stage-specific default is preferred. - // 2) If specified by tuning option, use the specified wave size. - // 3) If gl_SubgroupSize is used in shader, use the specified subgroup size when required. - // 4) If gl_SubgroupSize is not used in the (mesh/task/compute) shader, and the workgroup size is - // not larger than 32, use wave size 32. - - if (checkingStage == ShaderStage::Fragment) { - // Per programming guide, it's recommended to use wave64 for fragment shader. - waveSize = 64; - } else if (hasShaderStage(ShaderStage::Geometry)) { - // Legacy (non-NGG) hardware path for GS does not support wave32. - waveSize = 64; - if (getTargetInfo().getGfxIpVersion().major >= 11) - waveSize = 32; - } - - // Experimental data from performance tuning show that wave64 is more efficient than wave32 in most cases for CS - // on post-GFX10.3. Hence, set the wave size to wave64 by default. - if (getTargetInfo().getGfxIpVersion() >= GfxIpVersion({10, 3}) && stage == ShaderStage::Compute) - waveSize = 64; - - // Prefer wave64 on GFX11+ - if (getTargetInfo().getGfxIpVersion() >= GfxIpVersion({11})) - waveSize = 64; - - unsigned waveSizeOption = getShaderOptions(checkingStage).waveSize; - if (waveSizeOption != 0) - waveSize = waveSizeOption; - - // Note: the conditions below override the tuning option. - // If workgroup size is not larger than 32, use wave size 32. - if (checkingStage == ShaderStage::Mesh || checkingStage == ShaderStage::Task || - checkingStage == ShaderStage::Compute) { - unsigned workGroupSize; - if (checkingStage == ShaderStage::Mesh) { - auto &mode = m_shaderModes.getMeshShaderMode(); - workGroupSize = mode.workgroupSizeX * mode.workgroupSizeY * mode.workgroupSizeZ; - } else { - assert(checkingStage == ShaderStage::Task || checkingStage == ShaderStage::Compute); - auto &mode = m_shaderModes.getComputeShaderMode(); - workGroupSize = mode.workgroupSizeX * mode.workgroupSizeY * mode.workgroupSizeZ; - } + // NOTE: GPU property wave size is used in shader, unless: + // 1) A stage-specific default is preferred. + // 2) If specified by tuning option, use the specified wave size. + // 3) If gl_SubgroupSize is used in shader, use the specified subgroup size when required. + // 4) If gl_SubgroupSize is not used in the (mesh/task/compute) shader, and the workgroup size is + // not larger than 32, use wave size 32. + + if (checkingStage == ShaderStage::Fragment) { + // Per programming guide, it's recommended to use wave64 for fragment shader. + waveSize = 64; + } else if (hasShaderStage(ShaderStage::Geometry)) { + // Legacy (non-NGG) hardware path for GS does not support wave32. + waveSize = 64; + if (getTargetInfo().getGfxIpVersion().major >= 11) + waveSize = 32; + } - if (workGroupSize <= 32) - waveSize = 32; + // Experimental data from performance tuning show that wave64 is more efficient than wave32 in most cases for CS + // on post-GFX10.3. Hence, set the wave size to wave64 by default. + if (getTargetInfo().getGfxIpVersion() >= GfxIpVersion({10, 3}) && stage == ShaderStage::Compute) + waveSize = 64; + + // Prefer wave64 on GFX11+ + if (getTargetInfo().getGfxIpVersion() >= GfxIpVersion({11})) + waveSize = 64; + + unsigned waveSizeOption = getShaderOptions(checkingStage).waveSize; + if (waveSizeOption != 0) + waveSize = waveSizeOption; + + // Note: the conditions below override the tuning option. + // If workgroup size is not larger than 32, use wave size 32. + if (checkingStage == ShaderStage::Mesh || checkingStage == ShaderStage::Task || + checkingStage == ShaderStage::Compute) { + unsigned workGroupSize; + if (checkingStage == ShaderStage::Mesh) { + auto &mode = m_shaderModes.getMeshShaderMode(); + workGroupSize = mode.workgroupSizeX * mode.workgroupSizeY * mode.workgroupSizeZ; + } else { + assert(checkingStage == ShaderStage::Task || checkingStage == ShaderStage::Compute); + auto &mode = m_shaderModes.getComputeShaderMode(); + workGroupSize = mode.workgroupSizeX * mode.workgroupSizeY * mode.workgroupSizeZ; } - // If subgroup size is used in any shader in the pipeline, use the specified subgroup size. - if (m_shaderModes.getAnyUseSubgroupSize()) { - // If allowVaryWaveSize is enabled, subgroupSize is default as zero, initialized as waveSize - subgroupSize = getShaderOptions(checkingStage).subgroupSize; - // The driver only sets waveSize if a size is requested by an app. We may want to change that in the driver to - // set subgroupSize instead. - if (subgroupSize == 0) - subgroupSize = getShaderOptions(checkingStage).waveSize; - if (subgroupSize == 0) - subgroupSize = waveSize; - - if ((subgroupSize < waveSize) || getOptions().fullSubgroups) - waveSize = subgroupSize; - } else { - // The subgroup size cannot be observed, use the wave size. + if (workGroupSize <= 32) + waveSize = 32; + } + + // If subgroup size is used in any shader in the pipeline, use the specified subgroup size. + if (m_shaderModes.getAnyUseSubgroupSize()) { + // If allowVaryWaveSize is enabled, subgroupSize is default as zero, initialized as waveSize + subgroupSize = getShaderOptions(checkingStage).subgroupSize; + // The driver only sets waveSize if a size is requested by an app. We may want to change that in the driver to + // set subgroupSize instead. + if (subgroupSize == 0) + subgroupSize = getShaderOptions(checkingStage).waveSize; + if (subgroupSize == 0) subgroupSize = waveSize; - } - assert(waveSize == 32 || waveSize == 64); - assert(waveSize <= subgroupSize); + if ((subgroupSize < waveSize) || getOptions().fullSubgroups) + waveSize = subgroupSize; + } else { + // The subgroup size cannot be observed, use the wave size. + subgroupSize = waveSize; } + + assert(waveSize == 32 || waveSize == 64); + assert(waveSize <= subgroupSize); + m_waveSize[checkingStage] = waveSize; m_subgroupSize[checkingStage] = subgroupSize; } @@ -1541,6 +1643,15 @@ bool PipelineState::getShaderWgpMode(ShaderStageEnum stage) const { return m_shaderOptions[stage].wgpMode; } +// ===================================================================================================================== +// Checks if NGG is enabled for the pipeline +bool PipelineState::isNggEnabled() const { + auto gfxIp = getTargetInfo().getGfxIpVersion(); + if (gfxIp.major >= 11) + return true; + return m_nggControl.enableNgg; +} + // ===================================================================================================================== // Checks if SW-emulated mesh pipeline statistics is needed bool PipelineState::needSwMeshPipelineStats() const { @@ -1612,6 +1723,29 @@ InterfaceData *PipelineState::getShaderInterfaceData(ShaderStageEnum shaderStage return &*intfData; } +// ===================================================================================================================== +// Gets static LDS usage of the specified shader stage in dwords +// Note: does not consider shader merging. +// +// @param shaderStage : Shader stage +// @param rtStack : Get size of LDS RayQuery stack for this stage +// @return LDS size in dwords +unsigned PipelineState::getShaderStaticLdsUsage(ShaderStageEnum shaderStage, bool rtStack) { + const ResourceUsage *RU = getShaderResourceUsage(shaderStage); + switch (shaderStage) { + case ShaderStage::TessControl: { + const auto &calcFactor = RU->inOutUsage.tcs.calcFactor; + return rtStack ? calcFactor.rayQueryLdsStackSize : calcFactor.tessOnChipLdsSize; + } + case ShaderStage::Geometry: { + const auto &calcFactor = RU->inOutUsage.gs.calcFactor; + return rtStack ? calcFactor.rayQueryLdsStackSize : calcFactor.gsOnChipLdsSize; + } + default: + return 0; + } +} + // ===================================================================================================================== // Compute the ExportFormat (as an opaque int) of the specified color export location with the specified output // type. Only the number of elements of the type is significant. diff --git a/lgc/state/RayTracingLibrarySummary.cpp b/lgc/state/RayTracingLibrarySummary.cpp index fe38599edf..9ac4fa0eab 100644 --- a/lgc/state/RayTracingLibrarySummary.cpp +++ b/lgc/state/RayTracingLibrarySummary.cpp @@ -46,6 +46,7 @@ static constexpr char KnownSetRayFlags[] = "ray_flags_known_set"; static constexpr char KnownUnsetRayFlags[] = "ray_flags_known_unset"; static constexpr char MaxRayPayloadSize[] = "max_ray_payload_size"; static constexpr char MaxHitAttributeSize[] = "max_hit_attribute_size"; +static constexpr char MaxUsedPayloadRegisterCount[] = "max_used_payload_register_count"; static constexpr char HasKernelEntry[] = "has_kernel_entry"; static constexpr char HasTraceRayModule[] = "has_trace_ray_module"; @@ -80,6 +81,7 @@ Expected RayTracingLibrarySummary::decodeMsgpack(Strin getUInt(root[RtLibSummary::KnownUnsetRayFlags], rls.knownUnsetRayFlags); getUInt(root[RtLibSummary::MaxRayPayloadSize], rls.maxRayPayloadSize); getUInt(root[RtLibSummary::MaxHitAttributeSize], rls.maxHitAttributeSize); + getUInt(root[RtLibSummary::MaxUsedPayloadRegisterCount], rls.maxUsedPayloadRegisterCount); getBool(root[RtLibSummary::HasKernelEntry], rls.hasKernelEntry); getBool(root[RtLibSummary::HasTraceRayModule], rls.hasTraceRayModule); @@ -98,6 +100,7 @@ std::string RayTracingLibrarySummary::encodeMsgpack() const { root[RtLibSummary::KnownUnsetRayFlags] = knownUnsetRayFlags; root[RtLibSummary::MaxRayPayloadSize] = maxRayPayloadSize; root[RtLibSummary::MaxHitAttributeSize] = maxHitAttributeSize; + root[RtLibSummary::MaxUsedPayloadRegisterCount] = maxUsedPayloadRegisterCount; root[RtLibSummary::HasKernelEntry] = hasKernelEntry; root[RtLibSummary::HasTraceRayModule] = hasTraceRayModule; @@ -114,6 +117,7 @@ void RayTracingLibrarySummary::merge(const RayTracingLibrarySummary &other) { } maxRayPayloadSize = std::max(maxRayPayloadSize, other.maxRayPayloadSize); maxHitAttributeSize = std::max(maxHitAttributeSize, other.maxHitAttributeSize); + maxUsedPayloadRegisterCount = std::max(maxUsedPayloadRegisterCount, other.maxUsedPayloadRegisterCount); // TODO: Inherit kernel entry and trace ray module if possible and avoid recompile? hasKernelEntry = false; diff --git a/lgc/state/ShaderStage.cpp b/lgc/state/ShaderStage.cpp index 20a733c79c..73c34b9f0b 100644 --- a/lgc/state/ShaderStage.cpp +++ b/lgc/state/ShaderStage.cpp @@ -155,6 +155,7 @@ Function *lgc::addFunctionArgs(Function *oldFunc, Type *retTy, ArrayRef newFunc->takeName(oldFunc); newFunc->setSubprogram(oldFunc->getSubprogram()); newFunc->setDLLStorageClass(oldFunc->getDLLStorageClass()); + newFunc->copyMetadata(oldFunc, 0); // Always insert the new function after the old function oldFunc->getParent()->getFunctionList().insertAfter(oldFunc->getIterator(), newFunc); diff --git a/lgc/state/TargetInfo.cpp b/lgc/state/TargetInfo.cpp index 098b919764..c2b2d32604 100644 --- a/lgc/state/TargetInfo.cpp +++ b/lgc/state/TargetInfo.cpp @@ -69,7 +69,6 @@ static void setGfx10BaseInfo(TargetInfo *targetInfo) { targetInfo->getGpuProperty().gsOnChipDefaultLdsSizePerSubgroup = 8192; targetInfo->getGpuProperty().ldsSizePerThreadGroup = 16384; - targetInfo->getGpuProperty().ldsSizeDwordGranularityShift = 7; targetInfo->getGpuProperty().maxSgprsAvailable = 102; targetInfo->getGpuProperty().supportsDpp = true; @@ -126,6 +125,7 @@ static void setGfx1010Info(TargetInfo *targetInfo) { targetInfo->getGpuWorkarounds().gfx10.waFixBadImageDescriptor = 1; } +#if LLPC_BUILD_NAVI12 // gfx1011 // // @param [in/out] targetInfo : Target info @@ -151,6 +151,7 @@ static void setGfx1011Info(TargetInfo *targetInfo) { targetInfo->getGpuProperty().supportIntegerDotFlag.compBitwidth4 = true; targetInfo->getGpuProperty().supportIntegerDotFlag.sameSignedness = true; } +#endif // gfx1012 // @@ -234,6 +235,7 @@ static void setGfx1034Info(TargetInfo *targetInfo) { targetInfo->getGpuProperty().numShaderEngines = 1; } +#if LLPC_BUILD_REMBRANDT // gfx1035 // // @param [in/out] targetInfo : Target info @@ -244,7 +246,9 @@ static void setGfx1035Info(TargetInfo *targetInfo) { targetInfo->getGpuProperty().numShaderEngines = 1; targetInfo->getGpuWorkarounds().gfx10.waClearWriteCompressBit = 1; } +#endif +#if LLPC_BUILD_RAPHAEL || LLPC_BUILD_MENDOCINO // gfx1036 // // @param [in/out] targetInfo : Target info @@ -254,6 +258,7 @@ static void setGfx1036Info(TargetInfo *targetInfo) { targetInfo->getGpuProperty().numShaderEngines = 1; } +#endif // gfx11 // @@ -306,7 +311,7 @@ static void setGfx1102Info(TargetInfo *targetInfo) { targetInfo->getGpuProperty().numShaderEngines = 2; } -#if LLPC_BUILD_PHOENIX1 +#if LLPC_BUILD_PHOENIX1 || LLPC_BUILD_PHOENIX2 // gfx1103 // // @param [in/out] targetInfo : Target info @@ -331,20 +336,26 @@ bool TargetInfo::setTargetInfo(StringRef gpuName) { static const GpuNameStringMap GpuNameMap[] = { {"gfx1010", &setGfx1010Info}, // gfx1010 +#if LLPC_BUILD_NAVI12 {"gfx1011", &setGfx1011Info}, // gfx1011, navi12 +#endif {"gfx1012", &setGfx1012Info}, // gfx1012, navi14 {"gfx1030", &setGfx1030Info}, // gfx1030, navi21 {"gfx1031", &setGfx1031Info}, // gfx1031, navi22 {"gfx1032", &setGfx1032Info}, // gfx1032, navi23 {"gfx1034", &setGfx1034Info}, // gfx1034, navi24 +#if LLPC_BUILD_REMBRANDT {"gfx1035", &setGfx1035Info}, // gfx1035, rembrandt +#endif +#if LLPC_BUILD_RAPHAEL || LLPC_BUILD_MENDOCINO {"gfx1036", &setGfx1036Info}, // gfx1036, raphael | mendocino +#endif {"gfx1100", &setGfx1100Info}, // gfx1100, navi31 #if LLPC_BUILD_NAVI32 {"gfx1101", &setGfx1101Info}, // gfx1101, navi32 #endif {"gfx1102", &setGfx1102Info}, // gfx1102, navi33 -#if LLPC_BUILD_PHOENIX1 +#if LLPC_BUILD_PHOENIX1 || LLPC_BUILD_PHOENIX2 {"gfx1103", &setGfx1103Info}, // gfx1103, phoenix1 #endif }; diff --git a/lgc/test/Transforms/CpsLowering/continuation-basic.lgc b/lgc/test/Transforms/CpsLowering/continuation-basic.lgc index e0faa36e92..d93e64a2c0 100644 --- a/lgc/test/Transforms/CpsLowering/continuation-basic.lgc +++ b/lgc/test/Transforms/CpsLowering/continuation-basic.lgc @@ -1,23 +1,11 @@ -; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc --function-signature +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --function-signature ; RUN: lgc -mcpu=gfx1030 -o - -passes='require,lgc-patch-entry-point-mutate' %s | FileCheck --check-prefixes=CHECK %s declare void @lgc.cps.jump(i32 %target, i32 %levels, {i32} %state, ...) noreturn define void @test({i32} %state, i32 %arg, ptr %table) !lgc.cps !0 !lgc.shaderstage !{i32 7} { -entry: - %v = extractvalue {i32} %state, 0 - %table.0 = getelementptr i32, ptr %table, i32 0 - %cr.then = load i32, ptr %table.0 - %then.arg = add i32 %arg, 1 - %v.then = mul i32 %v, 2 - %state.then = insertvalue {i32} poison, i32 %v.then, 0 - call void (i32, i32, { i32 }, ...) @lgc.cps.jump(i32 %cr.then, i32 2, {i32} %state.then, i32 %then.arg) - unreachable -} - -!0 = !{i32 1} ; level 1 ; CHECK-LABEL: define {{[^@]+}}@test -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps !2 !lgc.shaderstage !3 { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps [[META2:![0-9]+]] !lgc.shaderstage [[META3:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.s.getpc() @@ -91,3 +79,16 @@ entry: ; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32s(ptr inreg [[TMP30]], i32 inreg [[TMP26]], <20 x i32> inreg [[TMP57]], { <3 x i32>, i32, ptr addrspace(5), i32 } [[TMP16]], i32 0) ; CHECK-NEXT: unreachable ; +entry: + %v = extractvalue {i32} %state, 0 + %table.0 = getelementptr i32, ptr %table, i32 0 + %cr.then = load i32, ptr %table.0 + %then.arg = add i32 %arg, 1 + %v.then = mul i32 %v, 2 + %state.then = insertvalue {i32} poison, i32 %v.then, 0 + call void (i32, i32, { i32 }, ...) @lgc.cps.jump(i32 %cr.then, i32 2, {i32} %state.then, i32 %then.arg) + unreachable +} + +!0 = !{i32 1} ; level 1 +; diff --git a/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc b/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc index 7a2bc5bef7..4764fa42b5 100644 --- a/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc --function-signature +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --function-signature ; RUN: lgc -mcpu=gfx1030 -o - -passes='require,lgc-patch-entry-point-mutate' %s | FileCheck --check-prefixes=CHECK %s declare void @lgc.cps.jump(i32, i32, { i32 }, ...) #0 @@ -8,47 +8,8 @@ declare void @lgc.cps.set.vsp(ptr addrspace(32)) #1 declare ptr addrspace(32) @lgc.cps.get.vsp() #2 define dllexport spir_func void @lgc.shader.CS.main() local_unnamed_addr #0 !lgc.shaderstage !3 { -.entry: - %desc = call <4 x i32> @lgc.load.user.data__v4i32(i32 0) - %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc) - %p0 = getelementptr i32, ptr addrspace(7) %ptr, i32 0 - %i_vsp = load i32, ptr addrspace(7) %p0, align 4 - %vsp = inttoptr i32 %i_vsp to ptr addrspace(32) - call void @lgc.cps.set.vsp(ptr addrspace(32) %vsp) - - %p1 = getelementptr i32, ptr addrspace(7) %ptr, i32 1 - %cr = load i32, ptr addrspace(7) %p1, align 4 - - %p2 = getelementptr i32, ptr addrspace(7) %ptr, i32 2 - %arg = load i32, ptr addrspace(7) %p2, align 4 - - %state = insertvalue { i32 } poison, i32 %arg, 0 - - %p32 = call ptr addrspace(32) @lgc.cps.get.vsp() - - call void (i32, i32, { i32 }, ...) @lgc.cps.jump(i32 %cr, i32 1, {i32} %state, i32 %arg, ptr addrspace(32) %p32) - unreachable -} - -declare <4 x i32> @lgc.load.user.data__v4i32(i32) #4 - -declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>) #5 - -attributes #0 = { nounwind } -attributes #1 = { nounwind willreturn memory(inaccessiblemem: write) } -attributes #2 = { nounwind willreturn memory(inaccessiblemem: read) } -attributes #4 = { nounwind memory(none) } -attributes #5 = { nounwind willreturn memory(none) } - -!lgc.user.data.nodes = !{!1} -!llpc.compute.mode = !{!2} - -!1 = !{!"DescriptorBuffer", i32 6, i32 6, i32 0, i32 4, i64 0, i32 0, i32 4} -!2 = !{i32 8, i32 4, i32 1, i32 0, i32 0, i32 1} -!3 = !{i32 7} - ; CHECK-LABEL: define {{[^@]+}}@lgc.shader.CS.main -; CHECK-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[PAD4:%.*]], i32 inreg noundef [[PAD5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], <3 x i32> inreg noundef [[WORKGROUPID:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], <3 x i32> noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR3:[0-9]+]] !lgc.shaderstage !4 { +; CHECK-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[PAD4:%.*]], i32 inreg noundef [[PAD5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], <3 x i32> inreg noundef [[WORKGROUPID:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], <3 x i32> noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR3:[0-9]+]] !lgc.shaderstage [[META4:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.s.getpc() @@ -123,3 +84,43 @@ attributes #5 = { nounwind willreturn memory(none) } ; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_i32p5i32i32s(ptr inreg [[TMP30]], i32 inreg [[TMP26]], <20 x i32> inreg [[TMP57]], { i32, ptr addrspace(5), i32, i32 } [[TMP19]], i32 0) ; CHECK-NEXT: unreachable ; +.entry: + %desc = call <4 x i32> @lgc.load.user.data__v4i32(i32 0) + %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc) + %p0 = getelementptr i32, ptr addrspace(7) %ptr, i32 0 + %i_vsp = load i32, ptr addrspace(7) %p0, align 4 + %vsp = inttoptr i32 %i_vsp to ptr addrspace(32) + call void @lgc.cps.set.vsp(ptr addrspace(32) %vsp) + + %p1 = getelementptr i32, ptr addrspace(7) %ptr, i32 1 + %cr = load i32, ptr addrspace(7) %p1, align 4 + + %p2 = getelementptr i32, ptr addrspace(7) %ptr, i32 2 + %arg = load i32, ptr addrspace(7) %p2, align 4 + + %state = insertvalue { i32 } poison, i32 %arg, 0 + + %p32 = call ptr addrspace(32) @lgc.cps.get.vsp() + + call void (i32, i32, { i32 }, ...) @lgc.cps.jump(i32 %cr, i32 1, {i32} %state, i32 %arg, ptr addrspace(32) %p32) + unreachable +} + +declare <4 x i32> @lgc.load.user.data__v4i32(i32) #4 + +declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>) #5 + +attributes #0 = { nounwind } +attributes #1 = { nounwind willreturn memory(inaccessiblemem: write) } +attributes #2 = { nounwind willreturn memory(inaccessiblemem: read) } +attributes #4 = { nounwind memory(none) } +attributes #5 = { nounwind willreturn memory(none) } + +!lgc.user.data.nodes = !{!1} +!llpc.compute.mode = !{!2} + +!1 = !{!"DescriptorBuffer", i32 6, i32 6, i32 0, i32 4, i64 0, i32 0, i32 4} +!2 = !{i32 8, i32 4, i32 1, i32 0, i32 0, i32 1} +!3 = !{i32 7} + +; diff --git a/lgc/test/Transforms/CpsLowering/cps-from-continufy.lgc b/lgc/test/Transforms/CpsLowering/cps-from-continufy.lgc index 11e6485e85..d70431730b 100644 --- a/lgc/test/Transforms/CpsLowering/cps-from-continufy.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-from-continufy.lgc @@ -1,150 +1,12 @@ -; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc --function-signature +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1030 -o - -passes='require,lgc-patch-entry-point-mutate' %s | FileCheck --check-prefixes=CHECK %s %_rgen_1.Frame = type { ptr addrspace(7), ptr addrspace(7), i32 } ; Function Attrs: alwaysinline nounwind -define spir_func void @_rgen_1({} %state, i32 %rcr) #0 !spirv.ExecutionModel !15 !lgc.shaderstage !16 !continufy.stage !17 !continuation !18 !lgc.cps !17 { -.entry: - %0 = call ptr addrspace(32) @lgc.cps.alloc(i32 96) - %1 = call i64 @llvm.amdgcn.s.getpc() - %2 = bitcast i64 %1 to <2 x i32> - %3 = call i64 @llvm.amdgcn.s.getpc() - %4 = bitcast i64 %3 to <2 x i32> - %5 = call i64 @llvm.amdgcn.s.getpc() - %6 = bitcast i64 %5 to <2 x i32> - %7 = call i32 @lgc.load.user.data__i32(i32 20) - %8 = insertelement <2 x i32> %6, i32 %7, i64 0 - %9 = bitcast <2 x i32> %8 to i64 - %10 = inttoptr i64 %9 to ptr addrspace(4) - %11 = getelementptr i8, ptr addrspace(4) %10, i32 0 - %12 = load <2 x i32>, ptr addrspace(4) %11, align 8 - %13 = extractelement <2 x i32> %12, i64 0 - %14 = extractelement <2 x i32> %12, i64 1 - %15 = insertelement <4 x i32> poison, i32 %13, i64 0 - %16 = and i32 %14, 65535 - %17 = insertelement <4 x i32> %15, i32 %16, i64 1 - %18 = insertelement <4 x i32> %17, i32 -1, i64 2 - %19 = insertelement <4 x i32> %18, i32 553734060, i64 3 - %20 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %19) - %21 = call i32 @lgc.load.user.data__i32(i32 0) - %22 = insertelement <2 x i32> %4, i32 %21, i64 0 - %23 = bitcast <2 x i32> %22 to i64 - %24 = inttoptr i64 %23 to ptr addrspace(4) - %25 = getelementptr i8, ptr addrspace(4) %24, i32 32 - %26 = load <4 x i32>, ptr addrspace(4) %25, align 16 - %27 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %26) - %28 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %0, i32 0, i32 0 - store ptr addrspace(7) %27, ptr addrspace(32) %28, align 32 - %29 = call i32 @lgc.load.user.data__i32(i32 0) - %30 = insertelement <2 x i32> %2, i32 %29, i64 0 - %31 = bitcast <2 x i32> %30 to i64 - %32 = inttoptr i64 %31 to ptr addrspace(4) - %33 = getelementptr i8, ptr addrspace(4) %32, i32 48 - %34 = load <4 x i32>, ptr addrspace(4) %33, align 16 - %35 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %34) - %36 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %0, i32 0, i32 1 - store ptr addrspace(7) %35, ptr addrspace(32) %36, align 32 - %37 = load volatile i32, ptr addrspace(7) %35, align 4 - %38 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %0, i32 0, i32 2 - store i32 %37, ptr addrspace(32) %38, align 4 - %39 = add i32 %37, -37 - %40 = getelementptr inbounds i8, ptr addrspace(7) %20, i32 52 - %41 = load i64, ptr addrspace(7) %40, align 8 - %42 = getelementptr inbounds i8, ptr addrspace(7) %20, i32 60 - %43 = load i32, ptr addrspace(7) %42, align 4 - %44 = mul i32 %39, %43 - %45 = inttoptr i64 %41 to ptr addrspace(1) - %46 = sext i32 %44 to i64 - %47 = getelementptr i8, ptr addrspace(1) %45, i64 %46 - %48 = load i64, ptr addrspace(1) %47, align 8 - %49 = inttoptr i64 %48 to ptr - %50 = ptrtoint ptr %49 to i32 - %51 = or i32 %50, 1 - %52 = inttoptr i32 %51 to ptr - %53 = call i32 (...) @lgc.cps.as.continuation.reference(ptr @_rgen_1.resume.0) - call void (...) @lgc.cps.jump(i32 %51, i32 2, {} poison, i32 %53, [1 x i32] undef, i32 %39) - unreachable -} - -define void @_rgen_1.resume.0({} %0, i32 %1, [1 x i32] %2) !spirv.ExecutionModel !15 !lgc.shaderstage !16 !continufy.stage !17 !continuation !18 !lgc.cps !17 { -entryresume.0: - %3 = call ptr addrspace(32) @lgc.cps.peek(i32 96) - %4 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %3, i32 0, i32 2 - %.reload6 = load i32, ptr addrspace(32) %4, align 4 - %5 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %3, i32 0, i32 1 - %.reload3 = load ptr addrspace(7), ptr addrspace(32) %5, align 32 - %6 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %3, i32 0, i32 0 - %.reload = load ptr addrspace(7), ptr addrspace(32) %6, align 32 - %dummy.udata = call i32 @lgc.load.user.data__i32(i32 20) - %dummy.gep = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %3, i32 %dummy.udata, i32 0 - %dummy.reload = load ptr addrspace(7), ptr addrspace(32) %dummy.gep, align 32 - %7 = load volatile i32, ptr addrspace(7) %.reload3, align 4 - %8 = icmp eq i32 %.reload6, %7 - %9 = zext i1 %8 to i32 - store i32 %9, ptr addrspace(7) %.reload, align 4 - ret void -} - -; Function Attrs: nounwind willreturn memory(none) -declare i32 @lgc.load.user.data__i32(i32) #1 - -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i64 @llvm.amdgcn.s.getpc() #2 - -; Function Attrs: nounwind willreturn memory(none) -declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>) #1 - -; Function Attrs: nounwind willreturn memory(inaccessiblemem: readwrite) -declare ptr addrspace(32) @lgc.cps.alloc(i32) #6 - -; Function Attrs: nounwind willreturn -declare i32 @lgc.cps.as.continuation.reference(...) #3 - -; Function Attrs: noreturn -declare void @lgc.cps.jump(...) #5 - -; Function Attrs: nounwind willreturn memory(inaccessiblemem: read) -declare ptr addrspace(32) @lgc.cps.peek(i32) #7 - -attributes #0 = { alwaysinline nounwind "target-features"=",+wavefrontsize32" } -attributes #1 = { nounwind willreturn memory(none) } -attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -attributes #3 = { nounwind willreturn } -attributes #4 = { nounwind } -attributes #5 = { noreturn } -attributes #6 = { nounwind willreturn memory(inaccessiblemem: readwrite) } -attributes #7 = { nounwind willreturn memory(inaccessiblemem: read) } - -!llpc.compute.mode = !{!0} -!lgc.client = !{!1} -!lgc.options = !{!2} -!lgc.options.CS = !{!3} -!lgc.user.data.nodes = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13} -!amdgpu.pal.metadata.msgpack = !{!14} - -!0 = !{i32 8, i32 4, i32 1} -!1 = !{!"Vulkan"} -!2 = !{i32 262875531, i32 502344192, i32 854861601, i32 -1595331954, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16777216, i32 0, i32 0, i32 2} -!3 = !{i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 192, i32 0, i32 0, i32 32, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800, i32 0, i32 0, i32 1} -!4 = !{!"DescriptorTableVaPtr", i32 7, i32 0, i32 0, i32 1, i32 4} -!5 = !{!"DescriptorBuffer", i32 6, i32 0, i32 0, i32 4, i64 0, i32 0, i32 4} -!6 = !{!"DescriptorBuffer", i32 6, i32 0, i32 4, i32 4, i64 0, i32 1, i32 4} -!7 = !{!"DescriptorBuffer", i32 6, i32 0, i32 8, i32 4, i64 0, i32 2, i32 4} -!8 = !{!"DescriptorBuffer", i32 6, i32 0, i32 12, i32 4, i64 0, i32 3, i32 4} -!9 = !{!"StreamOutTableVaPtr", i32 11, i32 0, i32 1, i32 1, i32 0} -!10 = !{!"DescriptorTableVaPtr", i32 7, i32 0, i32 5, i32 1, i32 3} -!11 = !{!"DescriptorBufferCompact", i32 10, i32 0, i32 0, i32 2, i64 93, i32 17, i32 2} -!12 = !{!"DescriptorBuffer", i32 6, i32 0, i32 2, i32 4, i64 93, i32 0, i32 4} -!13 = !{!"DescriptorBuffer", i32 6, i32 0, i32 6, i32 4, i64 93, i32 1, i32 4} -!14 = !{!"\82\B0amdpal.pipelines\91\83\B0.spill_threshold\CD\FF\FF\B0.user_data_limit\00\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\C4jyX\05\E6M\0F\CF\03b\DD\05\C5\B6\DB\B9\AD.llpc_version\A467.0\AEamdpal.version\92\03\00"} -!15 = !{i32 5313} -!16 = !{i32 7} -!17 = !{i32 0} -!18 = !{ptr @_rgen_1} - -; CHECK-LABEL: define {{[^@]+}}@_rgen_1 -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[USERDATA0:%.*]], i32 inreg [[USERDATA1:%.*]], i32 inreg [[USERDATA2:%.*]], i32 inreg [[USERDATA3:%.*]], i32 inreg [[USERDATA4:%.*]], i32 inreg [[USERDATA5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]], i32 [[RCR:%.*]]) #[[ATTR0:[0-9]+]] align 64 !spirv.ExecutionModel !15 !lgc.shaderstage !16 !continufy.stage !17 !continuation !18 !lgc.cps !17 { +define spir_func void @_rgen_1({} %state, i32 %rcr) #0 !spirv.ExecutionModel !15 !lgc.shaderstage !16 !continuation !18 !lgc.cps !17 { +; CHECK-LABEL: define amdgpu_cs_chain void @_rgen_1( +; CHECK-SAME: i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[USERDATA0:%.*]], i32 inreg [[USERDATA1:%.*]], i32 inreg [[USERDATA2:%.*]], i32 inreg [[USERDATA3:%.*]], i32 inreg [[USERDATA4:%.*]], i32 inreg [[USERDATA5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]], i32 [[RCR:%.*]]) #[[ATTR0:[0-9]+]] align 64 !spirv.ExecutionModel !15 !lgc.shaderstage [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] !lgc.cps [[META18:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.s.getpc() @@ -270,9 +132,71 @@ attributes #7 = { nounwind willreturn memory(inaccessiblemem: read) } ; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32i32s(ptr inreg [[TMP86]], i32 inreg [[TMP82]], <20 x i32> inreg [[TMP113]], { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP72]], i32 0) ; CHECK-NEXT: unreachable ; -; -; CHECK-LABEL: define {{[^@]+}}@_rgen_1.resume.0 -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[USERDATA0:%.*]], i32 inreg [[USERDATA1:%.*]], i32 inreg [[USERDATA2:%.*]], i32 inreg [[USERDATA3:%.*]], i32 inreg [[USERDATA4:%.*]], i32 inreg [[USERDATA5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]], i32 [[TMP0:%.*]], [1 x i32] [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] align 64 !spirv.ExecutionModel !15 !lgc.shaderstage !16 !continufy.stage !17 !continuation !18 !lgc.cps !17 { +.entry: + %0 = call ptr addrspace(32) @lgc.cps.alloc(i32 96) + %1 = call i64 @llvm.amdgcn.s.getpc() + %2 = bitcast i64 %1 to <2 x i32> + %3 = call i64 @llvm.amdgcn.s.getpc() + %4 = bitcast i64 %3 to <2 x i32> + %5 = call i64 @llvm.amdgcn.s.getpc() + %6 = bitcast i64 %5 to <2 x i32> + %7 = call i32 @lgc.load.user.data__i32(i32 20) + %8 = insertelement <2 x i32> %6, i32 %7, i64 0 + %9 = bitcast <2 x i32> %8 to i64 + %10 = inttoptr i64 %9 to ptr addrspace(4) + %11 = getelementptr i8, ptr addrspace(4) %10, i32 0 + %12 = load <2 x i32>, ptr addrspace(4) %11, align 8 + %13 = extractelement <2 x i32> %12, i64 0 + %14 = extractelement <2 x i32> %12, i64 1 + %15 = insertelement <4 x i32> poison, i32 %13, i64 0 + %16 = and i32 %14, 65535 + %17 = insertelement <4 x i32> %15, i32 %16, i64 1 + %18 = insertelement <4 x i32> %17, i32 -1, i64 2 + %19 = insertelement <4 x i32> %18, i32 553734060, i64 3 + %20 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %19) + %21 = call i32 @lgc.load.user.data__i32(i32 0) + %22 = insertelement <2 x i32> %4, i32 %21, i64 0 + %23 = bitcast <2 x i32> %22 to i64 + %24 = inttoptr i64 %23 to ptr addrspace(4) + %25 = getelementptr i8, ptr addrspace(4) %24, i32 32 + %26 = load <4 x i32>, ptr addrspace(4) %25, align 16 + %27 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %26) + %28 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %0, i32 0, i32 0 + store ptr addrspace(7) %27, ptr addrspace(32) %28, align 32 + %29 = call i32 @lgc.load.user.data__i32(i32 0) + %30 = insertelement <2 x i32> %2, i32 %29, i64 0 + %31 = bitcast <2 x i32> %30 to i64 + %32 = inttoptr i64 %31 to ptr addrspace(4) + %33 = getelementptr i8, ptr addrspace(4) %32, i32 48 + %34 = load <4 x i32>, ptr addrspace(4) %33, align 16 + %35 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %34) + %36 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %0, i32 0, i32 1 + store ptr addrspace(7) %35, ptr addrspace(32) %36, align 32 + %37 = load volatile i32, ptr addrspace(7) %35, align 4 + %38 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %0, i32 0, i32 2 + store i32 %37, ptr addrspace(32) %38, align 4 + %39 = add i32 %37, -37 + %40 = getelementptr inbounds i8, ptr addrspace(7) %20, i32 52 + %41 = load i64, ptr addrspace(7) %40, align 8 + %42 = getelementptr inbounds i8, ptr addrspace(7) %20, i32 60 + %43 = load i32, ptr addrspace(7) %42, align 4 + %44 = mul i32 %39, %43 + %45 = inttoptr i64 %41 to ptr addrspace(1) + %46 = sext i32 %44 to i64 + %47 = getelementptr i8, ptr addrspace(1) %45, i64 %46 + %48 = load i64, ptr addrspace(1) %47, align 8 + %49 = inttoptr i64 %48 to ptr + %50 = ptrtoint ptr %49 to i32 + %51 = or i32 %50, 1 + %52 = inttoptr i32 %51 to ptr + %53 = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @_rgen_1.resume.0) + call void (...) @lgc.cps.jump(i32 %51, i32 2, {} poison, i32 %53, [1 x i32] undef, i32 %39) + unreachable +} + +define void @_rgen_1.resume.0({} %0, i32 %1, [1 x i32] %2) !spirv.ExecutionModel !15 !lgc.shaderstage !16 !continuation !18 !lgc.cps !17 { +; CHECK-LABEL: define amdgpu_cs_chain void @_rgen_1.resume.0( +; CHECK-SAME: i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[USERDATA0:%.*]], i32 inreg [[USERDATA1:%.*]], i32 inreg [[USERDATA2:%.*]], i32 inreg [[USERDATA3:%.*]], i32 inreg [[USERDATA4:%.*]], i32 inreg [[USERDATA5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]], i32 [[TMP0:%.*]], [1 x i32] [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] align 64 !spirv.ExecutionModel !15 !lgc.shaderstage [[META16]] !continuation [[META17]] !lgc.cps [[META18]] { ; CHECK-NEXT: entryresume.0: ; CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.s.getpc() @@ -360,3 +284,86 @@ attributes #7 = { nounwind willreturn memory(inaccessiblemem: read) } ; CHECK: ret.block: ; CHECK-NEXT: ret void ; +entryresume.0: + %3 = call ptr addrspace(32) @lgc.cps.peek(i32 96) + %4 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %3, i32 0, i32 2 + %.reload6 = load i32, ptr addrspace(32) %4, align 4 + %5 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %3, i32 0, i32 1 + %.reload3 = load ptr addrspace(7), ptr addrspace(32) %5, align 32 + %6 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %3, i32 0, i32 0 + %.reload = load ptr addrspace(7), ptr addrspace(32) %6, align 32 + %dummy.udata = call i32 @lgc.load.user.data__i32(i32 20) + %dummy.gep = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %3, i32 %dummy.udata, i32 0 + %dummy.reload = load ptr addrspace(7), ptr addrspace(32) %dummy.gep, align 32 + %7 = load volatile i32, ptr addrspace(7) %.reload3, align 4 + %8 = icmp eq i32 %.reload6, %7 + %9 = zext i1 %8 to i32 + store i32 %9, ptr addrspace(7) %.reload, align 4 + ret void +} + +; Function Attrs: nounwind willreturn memory(none) +declare i32 @lgc.load.user.data__i32(i32) #1 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i64 @llvm.amdgcn.s.getpc() #2 + +; Function Attrs: nounwind willreturn memory(none) +declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>) #1 + +; Function Attrs: nounwind willreturn memory(inaccessiblemem: readwrite) +declare ptr addrspace(32) @lgc.cps.alloc(i32) #6 + +; Function Attrs: nounwind willreturn +declare i32 @lgc.cps.as.continuation.reference__i32(...) #3 + +; Function Attrs: noreturn +declare void @lgc.cps.jump(...) #5 + +; Function Attrs: nounwind willreturn memory(inaccessiblemem: read) +declare ptr addrspace(32) @lgc.cps.peek(i32) #7 + +attributes #0 = { alwaysinline nounwind "target-features"=",+wavefrontsize32" } +attributes #1 = { nounwind willreturn memory(none) } +attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #3 = { nounwind willreturn } +attributes #4 = { nounwind } +attributes #5 = { noreturn } +attributes #6 = { nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #7 = { nounwind willreturn memory(inaccessiblemem: read) } + +!llpc.compute.mode = !{!0} +!lgc.client = !{!1} +!lgc.options = !{!2} +!lgc.options.CS = !{!3} +!lgc.user.data.nodes = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13} +!amdgpu.pal.metadata.msgpack = !{!14} + +!0 = !{i32 8, i32 4, i32 1} +!1 = !{!"Vulkan"} +!2 = !{i32 262875531, i32 502344192, i32 854861601, i32 -1595331954, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16777216, i32 0, i32 0, i32 2} +!3 = !{i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 192, i32 0, i32 0, i32 32, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800, i32 0, i32 0, i32 1} +!4 = !{!"DescriptorTableVaPtr", i32 7, i32 0, i32 0, i32 1, i32 4} +!5 = !{!"DescriptorBuffer", i32 6, i32 0, i32 0, i32 4, i64 0, i32 0, i32 4} +!6 = !{!"DescriptorBuffer", i32 6, i32 0, i32 4, i32 4, i64 0, i32 1, i32 4} +!7 = !{!"DescriptorBuffer", i32 6, i32 0, i32 8, i32 4, i64 0, i32 2, i32 4} +!8 = !{!"DescriptorBuffer", i32 6, i32 0, i32 12, i32 4, i64 0, i32 3, i32 4} +!9 = !{!"StreamOutTableVaPtr", i32 11, i32 0, i32 1, i32 1, i32 0} +!10 = !{!"DescriptorTableVaPtr", i32 7, i32 0, i32 5, i32 1, i32 3} +!11 = !{!"DescriptorBufferCompact", i32 10, i32 0, i32 0, i32 2, i64 93, i32 17, i32 2} +!12 = !{!"DescriptorBuffer", i32 6, i32 0, i32 2, i32 4, i64 93, i32 0, i32 4} +!13 = !{!"DescriptorBuffer", i32 6, i32 0, i32 6, i32 4, i64 93, i32 1, i32 4} +!14 = !{!"\82\B0amdpal.pipelines\91\83\B0.spill_threshold\CD\FF\FF\B0.user_data_limit\00\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\C4jyX\05\E6M\0F\CF\03b\DD\05\C5\B6\DB\B9\AD.llpc_version\A467.0\AEamdpal.version\92\03\00"} +!15 = !{i32 5313} +!16 = !{i32 7} +!17 = !{i32 0} +!18 = !{ptr @_rgen_1} + +; +; +; +;. +; CHECK: [[META16]] = !{i32 7} +; CHECK: [[META17]] = !{ptr @_rgen_1} +; CHECK: [[META18]] = !{i32 0} +;. diff --git a/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc b/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc index 89db5c7b5f..118d06b073 100644 --- a/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc @@ -1,10 +1,11 @@ -; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc --function-signature --check-globals +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --function-signature --check-globals ; RUN: lgc -mcpu=gfx1030 -o - -passes='require,lgc-patch-entry-point-mutate' %s | FileCheck --check-prefixes=CHECK %s declare void @lgc.cps.jump(...) noreturn declare ptr addrspace(32) @lgc.cps.alloc(i32) declare void @lgc.cps.free(i32) -declare i32 @lgc.cps.as.continuation.reference(ptr) +declare i32 @lgc.cps.as.continuation.reference__i32(ptr) +declare i64 @lgc.cps.as.continuation.reference__i64(ptr) declare ptr addrspace(32) @lgc.cps.peek(i32) declare ptr addrspace(32) @lgc.cps.get.vsp() declare i32 @lgc.cps.get.dummy.index(i32) @@ -12,92 +13,8 @@ declare i32 @lgc.cps.get.dummy.index(i32) %_rgen_1.Frame = type { ptr addrspace(5), ptr addrspace(5), i32 } define void @test.0({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i32 7} { - %mem = call ptr addrspace(32) @lgc.cps.alloc(i32 10) ; round up to 12 during lowering - - store i32 333, ptr addrspace(32) %mem - - %p1 = getelementptr i32, ptr addrspace(32) %mem, i32 1 - store i32 111, ptr addrspace(32) %p1 - - %p2 = getelementptr i8, ptr addrspace(32) %mem, i32 9 - store i8 99, ptr addrspace(32) %p2 - - %q1 = ptrtoint ptr addrspace(32) %p1 to i32 - - %state = insertvalue { ptr addrspace(32) } poison, ptr addrspace(32) %p2, 0 - - %cr = call i32 @lgc.cps.as.continuation.reference(ptr @test.1) - call void (...) @lgc.cps.jump(i32 %cr, i32 2, { ptr addrspace(32) } %state, ptr addrspace(32) %p2, i32 %q1) - unreachable -} - -define void @test.1({} %no_state, ptr addrspace(32) %p2, i32 %q1) !lgc.cps !{i32 1} !lgc.shaderstage !{i32 7} { - %p1 = inttoptr i32 %q1 to ptr addrspace(32) - %n111 = load i32, ptr addrspace(32) %p1 - %n99 = load i8, ptr addrspace(32) %p2 - - %cr = call i32 @lgc.cps.as.continuation.reference(ptr @test.2) - call void (...) @lgc.cps.jump(i32 %cr, i32 2, {} poison) - unreachable -} - -define void @test.2({ ptr addrspace(32) } %state) !lgc.cps !{i32 1} !lgc.shaderstage !{i32 7} { - %mem = call ptr addrspace(32) @lgc.cps.peek(i32 10) ; round up to 12 during lowering - %p2 = extractvalue { ptr addrspace(32) } %state, 0 - - %n333 = load i32, ptr addrspace(32) %mem - %n99 = load i8, ptr addrspace(32) %p2 - - call void @lgc.cps.free(i32 10) ; round up to 12 during lowering - ret void -} - -; Dummy test to show behavior with lowering of non-constant GEP indices. -define void @test.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i32 7} { - %mem = call ptr addrspace(32) @lgc.cps.alloc(i32 10) ; round up to 12 during lowering - - %stack.el0 = call i32 @lgc.cps.get.dummy.index(i32 0) - %1 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %mem, i32 %stack.el0 - %vsp = call ptr addrspace(32) @lgc.cps.get.vsp() - %vsp.i = ptrtoint ptr addrspace(32) %vsp to i32 - store i32 %vsp.i, ptr addrspace(32) %1 - - %stack.el1 = call i32 @lgc.cps.get.dummy.index(i32 1) - %2 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %mem, i32 %stack.el1 - %vsp.2 = call ptr addrspace(32) @lgc.cps.peek(i32 4) - %vsp.2.i = ptrtoint ptr addrspace(32) %vsp.2 to i32 - store i32 %vsp.2.i, ptr addrspace(32) %2 - - %stack.el2 = call i32 @lgc.cps.get.dummy.index(i32 2) - %stack.el2.div = sdiv i32 %stack.el2, 2 - %3 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %mem, i32 %stack.el2.div, i32 1 - %vsp.3 = call ptr addrspace(32) @lgc.cps.peek(i32 8) - %vsp.3.i = ptrtoint ptr addrspace(32) %vsp.3 to i32 - store i32 %vsp.3.i, ptr addrspace(32) %3 - - %cr = call i32 @lgc.cps.as.continuation.reference(ptr @test.1) - call void (...) @lgc.cps.jump(i32 %cr, i32 2, {} poison, ptr addrspace(32) %vsp.3, i32 %vsp.3.i) - unreachable -} - -; Dummy test to show behavior with lowering of nested GEPs. -define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i32 7} { - %mem = call ptr addrspace(32) @lgc.cps.alloc(i32 10) ; round up to 12 during lowering - - %stack.el0 = call i32 @lgc.cps.get.dummy.index(i32 0) - %gep.base = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %mem, i32 %stack.el0 - %1 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %gep.base, i32 0, i32 2 - %vsp = call ptr addrspace(32) @lgc.cps.get.vsp() - %vsp.i = ptrtoint ptr addrspace(32) %vsp to i32 - store i32 %vsp.i, ptr addrspace(32) %1 - - %cr = call i32 @lgc.cps.as.continuation.reference(ptr @test.1) - call void (...) @lgc.cps.jump(i32 %cr, i32 2, {} poison, ptr addrspace(32) %vsp, i32 %vsp.i) - unreachable -} - ; CHECK-LABEL: define {{[^@]+}}@test.0 -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps !2 !lgc.shaderstage !3 { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps [[META2:![0-9]+]] !lgc.shaderstage [[META3:![0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to <2 x i32> @@ -178,9 +95,28 @@ define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i3 ; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32s(ptr inreg [[TMP41]], i32 inreg [[TMP37]], <20 x i32> inreg [[TMP68]], { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP27]], i32 0) ; CHECK-NEXT: unreachable ; -; + %mem = call ptr addrspace(32) @lgc.cps.alloc(i32 10) ; round up to 12 during lowering + + store i32 333, ptr addrspace(32) %mem + + %p1 = getelementptr i32, ptr addrspace(32) %mem, i32 1 + store i32 111, ptr addrspace(32) %p1 + + %p2 = getelementptr i8, ptr addrspace(32) %mem, i32 9 + store i8 99, ptr addrspace(32) %p2 + + %q1 = ptrtoint ptr addrspace(32) %p1 to i32 + + %state = insertvalue { ptr addrspace(32) } poison, ptr addrspace(32) %p2, 0 + + %cr = call i32 @lgc.cps.as.continuation.reference__i32(ptr @test.1) + call void (...) @lgc.cps.jump(i32 %cr, i32 2, { ptr addrspace(32) } %state, ptr addrspace(32) %p2, i32 %q1) + unreachable +} + +define void @test.1({} %no_state, ptr addrspace(32) %p2, i32 %q1) !lgc.cps !{i32 1} !lgc.shaderstage !{i32 7} { ; CHECK-LABEL: define {{[^@]+}}@test.1 -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]], i32 [[P2:%.*]], i32 [[Q1:%.*]]) #[[ATTR1]] align 64 !lgc.cps !2 !lgc.shaderstage !3 { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]], i32 [[P2:%.*]], i32 [[Q1:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META2]] !lgc.shaderstage [[META3]] { ; CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to <2 x i32> @@ -248,9 +184,18 @@ define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i3 ; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5) }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5s(ptr inreg [[TMP32]], i32 inreg [[TMP28]], <20 x i32> inreg [[TMP59]], { <3 x i32>, i32, ptr addrspace(5) } [[TMP18]], i32 0) ; CHECK-NEXT: unreachable ; -; + %p1 = inttoptr i32 %q1 to ptr addrspace(32) + %n111 = load i32, ptr addrspace(32) %p1 + %n99 = load i8, ptr addrspace(32) %p2 + + %cr = call i32 @lgc.cps.as.continuation.reference__i32(ptr @test.2) + call void (...) @lgc.cps.jump(i32 %cr, i32 2, {} poison) + unreachable +} + +define void @test.2({ ptr addrspace(32) } %state) !lgc.cps !{i32 1} !lgc.shaderstage !{i32 7} { ; CHECK-LABEL: define {{[^@]+}}@test.2 -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]]) #[[ATTR1]] align 64 !lgc.cps !2 !lgc.shaderstage !3 { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META2]] !lgc.shaderstage [[META3]] { ; CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to <2 x i32> @@ -329,9 +274,20 @@ define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i3 ; CHECK: ret.block: ; CHECK-NEXT: ret void ; -; + %mem = call ptr addrspace(32) @lgc.cps.peek(i32 10) ; round up to 12 during lowering + %p2 = extractvalue { ptr addrspace(32) } %state, 0 + + %n333 = load i32, ptr addrspace(32) %mem + %n99 = load i8, ptr addrspace(32) %p2 + + call void @lgc.cps.free(i32 10) ; round up to 12 during lowering + ret void +} + +; Dummy test to show behavior with lowering of non-constant GEP indices. +define void @test.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i32 7} { ; CHECK-LABEL: define {{[^@]+}}@test.gep -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP1:%.*]]) #[[ATTR1]] align 64 !lgc.cps !2 !lgc.shaderstage !3 { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP1:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META2]] !lgc.shaderstage [[META3]] { ; CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to <2 x i32> @@ -423,9 +379,36 @@ define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i3 ; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32s(ptr inreg [[TMP50]], i32 inreg [[TMP46]], <20 x i32> inreg [[TMP77]], { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP36]], i32 0) ; CHECK-NEXT: unreachable ; -; + %mem = call ptr addrspace(32) @lgc.cps.alloc(i32 10) ; round up to 12 during lowering + + %stack.el0 = call i32 @lgc.cps.get.dummy.index(i32 0) + %1 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %mem, i32 %stack.el0 + %vsp = call ptr addrspace(32) @lgc.cps.get.vsp() + %vsp.i = ptrtoint ptr addrspace(32) %vsp to i32 + store i32 %vsp.i, ptr addrspace(32) %1 + + %stack.el1 = call i32 @lgc.cps.get.dummy.index(i32 1) + %2 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %mem, i32 %stack.el1 + %vsp.2 = call ptr addrspace(32) @lgc.cps.peek(i32 4) + %vsp.2.i = ptrtoint ptr addrspace(32) %vsp.2 to i32 + store i32 %vsp.2.i, ptr addrspace(32) %2 + + %stack.el2 = call i32 @lgc.cps.get.dummy.index(i32 2) + %stack.el2.div = sdiv i32 %stack.el2, 2 + %3 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %mem, i32 %stack.el2.div, i32 1 + %vsp.3 = call ptr addrspace(32) @lgc.cps.peek(i32 8) + %vsp.3.i = ptrtoint ptr addrspace(32) %vsp.3 to i32 + store i32 %vsp.3.i, ptr addrspace(32) %3 + + %cr = call i32 @lgc.cps.as.continuation.reference__i32(ptr @test.1) + call void (...) @lgc.cps.jump(i32 %cr, i32 2, {} poison, ptr addrspace(32) %vsp.3, i32 %vsp.3.i) + unreachable +} + +; Dummy test to show behavior with lowering of nested GEPs. +define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i32 7} { ; CHECK-LABEL: define {{[^@]+}}@test.nested.gep -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP1:%.*]]) #[[ATTR1]] align 64 !lgc.cps !2 !lgc.shaderstage !3 { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP1:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META2]] !lgc.shaderstage [[META3]] { ; CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to <2 x i32> @@ -499,6 +482,112 @@ define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i3 ; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[MULTIDISPATCHINFO]], i64 19 ; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32s(ptr inreg [[TMP38]], i32 inreg [[TMP34]], <20 x i32> inreg [[TMP65]], { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP24]], i32 0) ; CHECK-NEXT: unreachable +; + %mem = call ptr addrspace(32) @lgc.cps.alloc(i32 10) ; round up to 12 during lowering + + %stack.el0 = call i32 @lgc.cps.get.dummy.index(i32 0) + %gep.base = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %mem, i32 %stack.el0 + %1 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %gep.base, i32 0, i32 2 + %vsp = call ptr addrspace(32) @lgc.cps.get.vsp() + %vsp.i = ptrtoint ptr addrspace(32) %vsp to i32 + store i32 %vsp.i, ptr addrspace(32) %1 + + %cr = call i32 @lgc.cps.as.continuation.reference__i32(ptr @test.1) + call void (...) @lgc.cps.jump(i32 %cr, i32 2, {} poison, ptr addrspace(32) %vsp, i32 %vsp.i) + unreachable +} + +define void @test.i64.reference({} %no_state, ptr addrspace(32) %p2, i32 %q1) !lgc.cps !{i32 1} !lgc.shaderstage !{i32 7} { +; CHECK-LABEL: define {{[^@]+}}@test.i64.reference +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]], i32 [[P2:%.*]], i32 [[Q1:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META2]] !lgc.shaderstage [[META3]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[SPILLTABLE]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr addrspace(5) [[VSP]] to i32 +; CHECK-NEXT: store i32 [[TMP9]], ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i32 [[Q1]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP10]], i32 0 +; CHECK-NEXT: [[N111:%.*]] = load i32, ptr addrspace(5) [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i32 [[P2]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP12]], i32 0 +; CHECK-NEXT: [[N99:%.*]] = load i8, ptr addrspace(5) [[TMP13]], align 1 +; CHECK-NEXT: [[CR:%.*]] = trunc i64 add (i64 ptrtoint (ptr @test.2 to i64), i64 1) to i32 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(5) +; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] +; CHECK: tail.block: +; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP16]], i32 [[CR]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP17]], ptr addrspace(5) [[TMP15]], 2 +; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <3 x i32>, i32, ptr addrspace(5) } [[TMP18]], 1 +; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP19]], i32 [[VCR]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP21]]) +; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP22]], i1 true) +; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP20]], i32 [[TMP23]]) +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP20]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP25]]) +; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP24]]) +; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP26]]) +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP27]], -64 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP29]], i64 0 +; CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i32> [[TMP30]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr +; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i64 [[TMP33]] to <2 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i32> [[TMP34]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x i32> [[TMP34]], i64 1 +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = insertelement <20 x i32> [[TMP40]], i32 [[TMP35]], i64 1 +; CHECK-NEXT: [[TMP42:%.*]] = insertelement <20 x i32> [[TMP41]], i32 [[TMP36]], i64 2 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <20 x i32> [[TMP42]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP44:%.*]] = insertelement <20 x i32> [[TMP43]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP45:%.*]] = insertelement <20 x i32> [[TMP44]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP46:%.*]] = insertelement <20 x i32> [[TMP45]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP47:%.*]] = insertelement <20 x i32> [[TMP46]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP48:%.*]] = insertelement <20 x i32> [[TMP47]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <20 x i32> [[TMP48]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP50:%.*]] = insertelement <20 x i32> [[TMP49]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP51:%.*]] = insertelement <20 x i32> [[TMP50]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> [[TMP51]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[TMP37]], i64 16 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[TMP38]], i64 17 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[TMP39]], i64 18 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5) }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5s(ptr inreg [[TMP32]], i32 inreg [[TMP28]], <20 x i32> inreg [[TMP59]], { <3 x i32>, i32, ptr addrspace(5) } [[TMP18]], i32 0) +; CHECK-NEXT: unreachable +; + %p1 = inttoptr i32 %q1 to ptr addrspace(32) + %n111 = load i32, ptr addrspace(32) %p1 + %n99 = load i8, ptr addrspace(32) %p2 + + %cr64 = call i64 @lgc.cps.as.continuation.reference__i64(ptr @test.2) + %cr = trunc i64 %cr64 to i32 + call void (...) @lgc.cps.jump(i32 %cr, i32 2, {} poison) + unreachable +} + +; +; +; +; +; +; +; +; +; +; ; ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { noreturn } @@ -509,7 +598,7 @@ define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i3 ; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent noreturn nounwind } ;. ; CHECK: [[META0:![0-9]+]] = !{!""} -; CHECK: [[META1:![0-9]+]] = !{!"\82\B0amdpal.pipelines\91\83\B1.shader_functions\85\A6test.0\81\B4.frontend_stack_size\10\A6test.1\81\B4.frontend_stack_size\00\A6test.2\81\B4.frontend_stack_size\00\A8test.gep\81\B4.frontend_stack_size\0C\AFtest.nested.gep\81\B4.frontend_stack_size\0C\B0.spill_threshold\CD\FF\FF\B0.user_data_limit\00\AEamdpal.version\92\03\00"} -; CHECK: [[META2:![0-9]+]] = !{i32 1} -; CHECK: [[META3:![0-9]+]] = !{i32 7} +; CHECK: [[META1:![0-9]+]] = !{!"\82\B0amdpal.pipelines\91\83\B1.shader_functions\86\A6test.0\81\B4.frontend_stack_size\10\A6test.1\81\B4.frontend_stack_size\00\A6test.2\81\B4.frontend_stack_size\00\A8test.gep\81\B4.frontend_stack_size\0C\B2test.i64.reference\81\B4.frontend_stack_size\00\AFtest.nested.gep\81\B4.frontend_stack_size\0C\B0.spill_threshold\CD\FF\FF\B0.user_data_limit\00\AEamdpal.version\92\03\00"} +; CHECK: [[META2]] = !{i32 1} +; CHECK: [[META3]] = !{i32 7} ;. diff --git a/lgc/test/Transforms/CpsLowering/cps-unify-exits.lgc b/lgc/test/Transforms/CpsLowering/cps-unify-exits.lgc index e748f9cf55..403bba58f2 100644 --- a/lgc/test/Transforms/CpsLowering/cps-unify-exits.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-unify-exits.lgc @@ -1,53 +1,11 @@ -; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc --function-signature +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --function-signature ; RUN: lgc -mcpu=gfx1030 -o - -passes='require,lgc-patch-entry-point-mutate' %s | FileCheck --check-prefixes=CHECK %s declare void @lgc.cps.jump(...) noreturn define void @unify_jumps({i32} %state, i32 %arg, ptr %table) !lgc.cps !0 !lgc.shaderstage !{i32 7} { -entry: - %v = extractvalue {i32} %state, 0 - %cond = icmp ult i32 %v, 3 - br i1 %cond, label %then, label %else - -then: - %table.0 = getelementptr i32, ptr %table, i32 0 - %cr.then = load i32, ptr %table.0 - %then.arg = add i32 %arg, 1 - %v.then = mul i32 %v, 2 - %state.then = insertvalue {i32} poison, i32 %v.then, 0 - call void (...) @lgc.cps.jump(i32 %cr.then, i32 2, {i32} %state.then, i32 %then.arg) - unreachable - -else: - %table.1 = getelementptr i32, ptr %table, i32 1 - %cr.else = load i32, ptr %table.1 - %else.arg = uitofp i32 %arg to float - call void (...) @lgc.cps.jump(i32 %cr.else, i32 2, {} poison, float %else.arg, i32 5) - unreachable -} - -define void @unify_jump_ret({i32} %state, i32 %arg, ptr %table) !lgc.cps !0 !lgc.shaderstage !{i32 7} { -entry: - %v = extractvalue {i32} %state, 0 - %cond = icmp ult i32 %v, 3 - br i1 %cond, label %then, label %else - -then: - %table.0 = getelementptr i32, ptr %table, i32 0 - %cr.then = load i32, ptr %table.0 - %then.arg = add i32 %arg, 1 - %v.then = mul i32 %v, 2 - %state.then = insertvalue {i32} poison, i32 %v.then, 0 - call void (...) @lgc.cps.jump(i32 %cr.then, i32 2, {i32} %state.then, i32 %then.arg) - unreachable - -else: - ret void -} - -!0 = !{i32 1} ; level 1 ; CHECK-LABEL: define {{[^@]+}}@unify_jumps -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps !2 !lgc.shaderstage !3 { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps [[META2:![0-9]+]] !lgc.shaderstage [[META3:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.s.getpc() @@ -137,9 +95,31 @@ else: ; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32s(ptr inreg [[TMP38]], i32 inreg [[TMP34]], <20 x i32> inreg [[TMP65]], { <3 x i32>, i32, ptr addrspace(5), i32, i32 } [[TMP24]], i32 0) ; CHECK-NEXT: unreachable ; -; +entry: + %v = extractvalue {i32} %state, 0 + %cond = icmp ult i32 %v, 3 + br i1 %cond, label %then, label %else + +then: + %table.0 = getelementptr i32, ptr %table, i32 0 + %cr.then = load i32, ptr %table.0 + %then.arg = add i32 %arg, 1 + %v.then = mul i32 %v, 2 + %state.then = insertvalue {i32} poison, i32 %v.then, 0 + call void (...) @lgc.cps.jump(i32 %cr.then, i32 2, {i32} %state.then, i32 %then.arg) + unreachable + +else: + %table.1 = getelementptr i32, ptr %table, i32 1 + %cr.else = load i32, ptr %table.1 + %else.arg = uitofp i32 %arg to float + call void (...) @lgc.cps.jump(i32 %cr.else, i32 2, {} poison, float %else.arg, i32 5) + unreachable +} + +define void @unify_jump_ret({i32} %state, i32 %arg, ptr %table) !lgc.cps !0 !lgc.shaderstage !{i32 7} { ; CHECK-LABEL: define {{[^@]+}}@unify_jump_ret -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]]) #[[ATTR1]] align 64 !lgc.cps !2 !lgc.shaderstage !3 { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], ptr addrspace(5) [[VSP:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META2]] !lgc.shaderstage [[META3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.s.getpc() @@ -226,3 +206,25 @@ else: ; CHECK: ret.block: ; CHECK-NEXT: ret void ; +entry: + %v = extractvalue {i32} %state, 0 + %cond = icmp ult i32 %v, 3 + br i1 %cond, label %then, label %else + +then: + %table.0 = getelementptr i32, ptr %table, i32 0 + %cr.then = load i32, ptr %table.0 + %then.arg = add i32 %arg, 1 + %v.then = mul i32 %v, 2 + %state.then = insertvalue {i32} poison, i32 %v.then, 0 + call void (...) @lgc.cps.jump(i32 %cr.then, i32 2, {i32} %state.then, i32 %then.arg) + unreachable + +else: + ret void +} + +!0 = !{i32 1} ; level 1 +; +; +; diff --git a/llpc/CMakeLists.txt b/llpc/CMakeLists.txt index 1a156b2c6b..16120f7005 100644 --- a/llpc/CMakeLists.txt +++ b/llpc/CMakeLists.txt @@ -132,6 +132,7 @@ if(ICD_BUILD_LLPC) endforeach() endif() target_link_libraries(llpcinternal PUBLIC ${extra_llvm_libs}) + target_link_libraries(llpcinternal PRIVATE gfxruntime) endif() ### Compiler Options ################################################################################################### @@ -204,6 +205,7 @@ if(ICD_BUILD_LLPC) context/llpcGraphicsContext.cpp context/llpcPipelineContext.cpp context/llpcRayTracingContext.cpp + context/GfxRuntimeContext.cpp ) # llpc/lower @@ -217,7 +219,7 @@ if(ICD_BUILD_LLPC) lower/llpcSpirvLowerMath.cpp lower/llpcSpirvLowerMemoryOp.cpp lower/llpcSpirvLowerRayQuery.cpp - lower/llpcSpirvLowerRayQueryPostInline.cpp + lower/LowerPostInline.cpp lower/llpcSpirvLowerRayTracing.cpp lower/llpcSpirvLowerTerminator.cpp lower/llpcSpirvLowerTranslator.cpp @@ -227,6 +229,8 @@ if(ICD_BUILD_LLPC) lower/LowerGLCompatibility.cpp lower/llpcSpirvLowerCooperativeMatrix.cpp lower/PrepareContinuations.cpp + lower/LowerAdvancedBlend.cpp + lower/ProcessGfxRuntimeLibrary.cpp ) # llpc/translator diff --git a/llpc/context/GfxRuntimeContext.cpp b/llpc/context/GfxRuntimeContext.cpp new file mode 100644 index 0000000000..d7932edf6b --- /dev/null +++ b/llpc/context/GfxRuntimeContext.cpp @@ -0,0 +1,40 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file GfxRuntimeContext.cpp + * @brief LLVMContext extension that stores a GfxRuntime library module + *********************************************************************************************************************** + */ + +#include "GfxRuntimeContext.h" +#include "llvm/IR/Module.h" + +using namespace llvm; +using namespace lgc; + +GfxRuntimeContext::Key GfxRuntimeContext::theKey; + +GfxRuntimeContext::~GfxRuntimeContext() = default; diff --git a/llpc/context/GfxRuntimeContext.h b/llpc/context/GfxRuntimeContext.h new file mode 100644 index 0000000000..83639d10d1 --- /dev/null +++ b/llpc/context/GfxRuntimeContext.h @@ -0,0 +1,57 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file GfxRuntimeContext.h + * @brief LLVMContext extension that stores a GfxRuntime library module + *********************************************************************************************************************** + */ +#pragma once + +#include "llvm-dialects/Dialect/ContextExtension.h" +#include + +namespace llvm { +class Module; +} + +namespace lgc { + +// This extension can be attached to an LLVMContext and queried via the +// GfxRuntimeContext::get method inherited from the base class. +// +// Compiler drivers (like LLPC) are expected to set theModule to the GfxRuntime +// library, so that advanced blend pass can cross-module inline +// functions implemented there. +class GfxRuntimeContext : public llvm_dialects::ContextExtensionImpl { +public: + explicit GfxRuntimeContext(llvm::LLVMContext &) {} + ~GfxRuntimeContext(); + + static Key theKey; + std::unique_ptr theModule; +}; + +} // namespace lgc diff --git a/llpc/context/llpcCompiler.cpp b/llpc/context/llpcCompiler.cpp index a18669c8c7..cc8207067f 100644 --- a/llpc/context/llpcCompiler.cpp +++ b/llpc/context/llpcCompiler.cpp @@ -30,6 +30,7 @@ */ #include "llpcCompiler.h" #include "LLVMSPIRVLib.h" +#include "LowerAdvancedBlend.h" #include "PrepareContinuations.h" #include "SPIRVEntry.h" #include "SPIRVFunction.h" @@ -60,6 +61,7 @@ #include "lgc/Builder.h" #include "lgc/ElfLinker.h" #include "lgc/EnumIterator.h" +#include "lgc/LgcCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "lgc/PassManager.h" #include "llvm-dialects/Dialect/Dialect.h" @@ -80,6 +82,7 @@ // New version of the code (also handles unknown version, which we treat as latest) #include "llvm/IRPrinter/IRPrintingPasses.h" #endif +#include "llvmraytracing/Continuations.h" #include "llvmraytracing/GpurtContext.h" #include "llvm/Linker/Linker.h" #include "llvm/Support/CommandLine.h" @@ -368,6 +371,7 @@ class LlpcDiagnosticHandler : public DiagnosticHandler { // @param cache : Pointer to ICache implemented in client Result VKAPI_CALL ICompiler::Create(GfxIpVersion gfxIp, unsigned optionCount, const char *const *options, ICompiler **ppCompiler, ICache *cache) { + assert(gfxIp.major >= 10); // Only accept GFx10+ Result result = Result::Success; const char *client = options[0]; @@ -436,7 +440,7 @@ bool VKAPI_CALL ICompiler::IsVertexFormatSupported(VkFormat format) { // @param cache : Pointer to ICache implemented in client Compiler::Compiler(GfxIpVersion gfxIp, unsigned optionCount, const char *const *options, MetroHash::Hash optionHash, ICache *cache) - : m_optionHash(optionHash), m_gfxIp(gfxIp), m_cache(cache), m_relocatablePipelineCompilations(0) { + : m_gfxIp(gfxIp), m_cache(cache), m_relocatablePipelineCompilations(0) { for (unsigned i = 0; i < optionCount; ++i) m_options.push_back(options[i]); @@ -867,6 +871,103 @@ static unsigned getSamplerArraySizeInSpvStruct(const SPIRVType *spvStruct) { return samplerArraySize; } +// ===================================================================================================================== +// Get shader module usage information by analyzing instructions +// +// @param module : Spriv module binary +// @param func : Spriv function +// @param [out] texelFetchImageIds: image Ids which are called by texelFetch +// @param [out] shaderModuleUsage: usage info of a shader module +static void getShaderModuleUsageFromInst(SPIRVModule *module, SPIRVFunction *func, + std::set &texelFetchImageIds, ShaderModuleUsage &shaderModuleUsage) { + std::map instLoadVars; // pair of OpLoad instruction + std::map instImageVars; // pair of OpImage instruction + // Store variable ids for each OpAccessChain, avoid to handle these variables in OpStore again + std::set aggregateVarIds; + + // Lambda to set the state of legacy built-in + auto setLegacyBuiltInsUsageInfo = [&](unsigned location) { + if (location == static_cast(ClipVertex)) + shaderModuleUsage.useClipVertex = true; + if (location == static_cast(FrontColor)) + shaderModuleUsage.useFrontColor = true; + if (location == static_cast(BackColor)) + shaderModuleUsage.useBackColor = true; + if (location == static_cast(FrontSecondaryColor)) + shaderModuleUsage.useFrontSecondaryColor = true; + if (location == static_cast(BackSecondaryColor)) + shaderModuleUsage.useBackSecondaryColor = true; + }; + + for (auto j = 0; j < func->getNumBasicBlock(); ++j) { + auto *bb = func->getBasicBlock(j); + size_t instNum = bb->getNumInst(); + for (auto k = 0; k < instNum; ++k) { + auto *inst = bb->getInst(k); + spv::Op opCode = inst->getOpCode(); + if (opCode != spv::OpStore && opCode != spv::OpAccessChain && opCode != spv::OpLoad && opCode != spv::OpImage && + opCode != spv::OpImageFetch) + continue; + + std::vector ops; + std::vector ids; + if (opCode != spv::OpLoad) { + ops = inst->getOperands(); + ids = inst->getIds(ops); + } + + if (opCode == spv::OpStore && aggregateVarIds.find(ids[1]) == aggregateVarIds.end()) { + SPIRVType *varType = ops[1]->getValueType(ids[1]); + auto var = static_cast(module->getValue(ids[1])); + // The builtin outputs are global variables + if (varType->getPointerStorageClass() == StorageClassOutput) { + SPIRVWord location = SPIRVID_INVALID; + if (var->hasDecorate(DecorationLocation, 0, &location)) { + setLegacyBuiltInsUsageInfo(location); + } + } + } else if (opCode == spv::OpAccessChain) { + aggregateVarIds.insert(inst->getId()); + + SPIRVType *varType = ops[0]->getValueType(ids[0]); + SPIRVType *elemTy = varType->getPointerElementType(); + uint64_t index = static_cast(ops[1])->getZExtIntValue(); + if (varType->getPointerStorageClass() == StorageClassOutput) { + // The builtin outputs are structure members (glsl version 150 and above) + if (elemTy->isTypeStruct()) { + SPIRVWord builtIn = false; + SPIRVWord location = SPIRVID_INVALID; + auto *memberType = elemTy->getStructMemberType(index); + if (elemTy->hasMemberDecorate(index, DecorationBuiltIn, 0, &builtIn)) { + if (builtIn == spv::BuiltInClipDistance) + shaderModuleUsage.clipDistanceArraySize = memberType->getArrayLength(); + } else if (elemTy->hasMemberDecorate(index, DecorationLocation, 0, &location)) { + setLegacyBuiltInsUsageInfo(location); + } + } else if (elemTy->isTypeArray()) { + // gl_ClipDistance[] are global variables (glsl version 130 and 140) + auto var = static_cast(module->getValue(ids[0])); + SPIRVWord builtIn = 0; + if (var->hasDecorate(DecorationBuiltIn, 0, &builtIn) && builtIn == spv::BuiltInClipDistance) + shaderModuleUsage.clipDistanceArraySize = elemTy->getArrayLength(); + } + } + } else if (opCode == spv::OpLoad) { + SPIRVLoad *load = static_cast(inst); + instLoadVars[load->getId()] = load->getSrc()->getId(); + } else if (opCode == spv::OpImage) { + auto iter = instLoadVars.find(ids[0]); + if (iter != instLoadVars.end()) + instImageVars[inst->getId()] = iter->second; + } else if (opCode == spv::OpImageFetch) { + auto iter = instImageVars.find(ids[0]); + if (iter != instImageVars.end()) + texelFetchImageIds.insert(iter->second); + } + } + } +} + // ===================================================================================================================== // Parse the spirv binary to build the resource node data for buffers and opaque types, the resource node data will be // returned to client driver together with other info of ShaderModuleUsage @@ -895,12 +996,14 @@ void Compiler::buildShaderModuleResourceUsage( spirvStream >> *module; ShaderStage shaderStage = shaderInfo->entryStage; + std::set texelFetchImageIds; // Find the entry target. SPIRVEntryPoint *entryPoint = nullptr; SPIRVFunction *func = nullptr; for (unsigned i = 0, funcCount = module->getNumFunctions(); i < funcCount; ++i) { func = module->getFunction(i); + getShaderModuleUsageFromInst(module.get(), func, texelFetchImageIds, shaderModuleUsage); entryPoint = module->getEntryPoint(func->getId()); if (entryPoint && entryPoint->getExecModel() == convertToExecModel(shaderStage) && entryPoint->getName() == shaderInfo->pEntryTarget) @@ -991,6 +1094,7 @@ void Compiler::buildShaderModuleResourceUsage( SPIRVTypeSampledImage *sampledImageType = static_cast(varElemTy); SPIRVTypeImage *imageType = sampledImageType->getImageType(); textureSymbol.isTexelBuffer = imageType->getDescriptor().Dim == spv::DimBuffer; + textureSymbol.isTexelFetchUsed = (texelFetchImageIds.find(var->getId()) != texelFetchImageIds.end()); textureSymbolInfo.push_back(textureSymbol); } } else { @@ -1054,6 +1158,18 @@ void Compiler::dumpCompilerOptions(void *pipelineDumpFile) { PipelineDumper::DumpPipelineExtraInfo(reinterpret_cast(pipelineDumpFile), &extraInfo); } +// ===================================================================================================================== +// Helper function for dumping fragment outputs +// +// @param pipelineDumpFile : Handle of pipeline dump file +// @param data : fragment output buffer +// @param size : buffer size +void Compiler::dumpFragmentOutputs(void *pipelineDumpFile, const uint8_t *data, unsigned size) { + if (!pipelineDumpFile) + return; + PipelineDumper::DumpFragmentOutputs(reinterpret_cast(pipelineDumpFile), data, size); +} + // ===================================================================================================================== // Build unlinked shader to ElfPackage with pipeline info. // @@ -1153,8 +1269,6 @@ Result Compiler::buildGraphicsShaderStage(const GraphicsPipelineBuildInfo *pipel it = pipeNode.find(Vkgc::DiscardState); if (it != pipeNode.end()) discardState = it->second.getBool(); - } else { - report_fatal_error("Cannot emit llvm IR or bitcode with color export shader enabled"); } } @@ -1269,6 +1383,7 @@ Result Compiler::BuildColorExportShader(const GraphicsPipelineBuildInfo *pipelin if (exports.empty()) return Result::Success; + dumpFragmentOutputs(pipelineDumpFile, static_cast(fsOutputMetaData), metaDatatSize); dumpCompilerOptions(pipelineDumpFile); bool hasError = false; context->setDiagnosticHandler(std::make_unique(&hasError)); @@ -1445,8 +1560,11 @@ Result Compiler::buildUnlinkedShaderInternal(Context *context, ArrayRef(shaderInfo[ShaderStageFragment]->pModuleData); - if (moduleData->usage.useGenericBuiltIn || moduleData->usage.useBarycentric) + if (moduleData->usage.useGenericBuiltIn || moduleData->usage.useBarycentric) { + // TODO: We have added semantic to support generic builtIn, however, there seems to be some errors. We need to + // add more info to sync inputs and outputs. return Result::RequireFullPipeline; + } } else if (stage == UnlinkedStageVertexProcess) { bool hasVs = shaderInfo[ShaderStageVertex]->pModuleData != nullptr; bool hasTes = (shaderInfo[ShaderStageTessControl]->pModuleData != nullptr) || @@ -1679,6 +1797,23 @@ Result Compiler::buildPipelineInternal(Context *context, ArrayRefpModuleData) { + pipelineInfo = + static_cast(context->getPipelineContext()->getPipelineBuildInfo()); + enableAdvancedBlend = pipelineInfo->advancedBlendInfo.enableAdvancedBlend; + } + if (enableAdvancedBlend) + context->ensureGfxRuntimeLibrary(); + for (unsigned shaderIndex = 0; shaderIndex < shaderInfo.size() && result == Result::Success; ++shaderIndex) { const PipelineShaderInfo *shaderInfoEntry = shaderInfo[shaderIndex]; if (!shaderInfoEntry || !shaderInfoEntry->pModuleData) @@ -1739,6 +1874,16 @@ Result Compiler::buildPipelineInternal(Context *context, ArrayRefaddPass(LowerAdvancedBlend(pipelineInfo->advancedBlendInfo.binding)); + if (EnableOuts()) { + lowerPassMgr->addPass(PrintModulePass( + outs(), "\n" + "===============================================================================\n" + "// LLPC Advanced Blend Pass results\n")); + } + } + if (moduleData->usage.isInternalRtShader) setUseGpurt(&*pipeline); @@ -1810,6 +1955,7 @@ Result Compiler::buildPipelineInternal(Context *context, ArrayRefusage.enableRayQuery; flag.isInternalRtShader = moduleData->usage.isInternalRtShader; + flag.usesAdvancedBlend = enableAdvancedBlend; SpirvLower::addPasses(context, entryStage, *lowerPassMgr, timerProfiler.getTimer(TimerLower), flag); // Run the passes. bool success = runPasses(&*lowerPassMgr, modules[shaderIndex].get()); @@ -2559,6 +2705,10 @@ Result Compiler::BuildRayTracingPipeline(const RayTracingPipelineBuildInfo *pipe } } + // For continuations mode, it must be indirect mode. + if (rayTracingContext.isContinuationsMode()) + rayTracingContext.setIndirectPipeline(); + // Add entry module PipelineShaderInfo raygenMainShaderInfo = pipelineInfo->pShaders[0]; raygenMainShaderInfo.entryStage = ShaderStageRayTracingRayGen; @@ -2672,7 +2822,7 @@ Result Compiler::buildRayTracingPipelineElf(Context *context, std::unique_ptr &shaderProps, std::vector &moduleCallsTraceRay, unsigned moduleIndex, std::unique_ptr &pipeline, TimerProfiler &timerProfiler) { - + auto rtContext = static_cast(context->getPipelineContext()); if (moduleIndex > 0) { auto &shaderProp = shaderProps[moduleIndex - 1]; const StringRef &funcName = module->getName(); @@ -2681,7 +2831,15 @@ Result Compiler::buildRayTracingPipelineElf(Context *context, std::unique_ptrisContinuationsMode()) { + if (auto stage = tryGetLgcRtShaderStageFromName(funcName)) { + auto cpsLevel = cps::getCpsLevelForShaderStage(stage.value()); + shaderIdExtraBits |= static_cast(cpsLevel); + } + } + shaderProp.shaderIdExtraBits = shaderIdExtraBits; } auto options = pipeline->getOptions(); @@ -2692,15 +2850,34 @@ Result Compiler::buildRayTracingPipelineElf(Context *context, std::unique_ptr(context->getPipelineContext())->getIndirectStageMask() == 0) + if (rtContext->getIndirectStageMask() == 0) { options.rtIndirectMode = lgc::RayTracingIndirectMode::NotIndirect; + } else if (rtContext->isContinuationsMode()) { + // For continuations mode, we need to run LowerRaytracingPipelinePass here first separately because we need to + // collect metadata added by the pass + std::unique_ptr passMgr(lgc::PassManager::Create(context->getLgcContext())); + passMgr->registerModuleAnalysis([&] { return DialectContextAnalysis(false); }); + passMgr->addPass(LowerRaytracingPipelinePass()); + bool success = runPasses(&*passMgr, module.get()); + assert(success); + (void(success)); // unused + + auto maxUsedPayloadRegisterCount = ContHelper::tryGetMaxUsedPayloadRegisterCount(*module).value_or(0); + { + // Library summary in rtContext could be shared between threads, need to ensure it is only modified by one thread + // at a time. + std::lock_guard lock(getHelperThreadMutex()); + rtContext->getRayTracingLibrarySummary().maxUsedPayloadRegisterCount = + std::max(rtContext->getRayTracingLibrarySummary().maxUsedPayloadRegisterCount, maxUsedPayloadRegisterCount); + } + } pipeline->setOptions(options); generatePipeline(context, moduleIndex, std::move(module), pipelineElf, pipeline.get(), timerProfiler); if (moduleIndex > 0) - addRayTracingIndirectPipelineMetadata(&pipelineElf); + adjustRayTracingElf(&pipelineElf, rtContext, shaderProps[moduleIndex - 1]); return Result::Success; } @@ -3081,6 +3258,15 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, pipelineElfs.resize(newModules.size()); shaderProps.resize(newModules.size() - 1); + // Take entry module, it will be handled at last. + entry = std::move(newModules[0]); + + std::unique_ptr traversalModule; + if (indirectStageMask != 0) { + traversalModule = std::move(newModules.back()); + newModules.pop_back(); + } + InternalHelperThreadProvider ourHelperThreadProvider; if (cl::AddRtHelpers && !helperThreadProvider) helperThreadProvider = &ourHelperThreadProvider; @@ -3095,6 +3281,11 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, helperThreadProvider->SetTasks(&helperThreadBuildRayTracingPipelineElf, newModules.size(), static_cast(&helperThreadPayload)); + unsigned moduleIndex = 0; + // Initial increase to skip entry module, it will be handled later. + helperThreadProvider->GetNextTask(&moduleIndex); + helperThreadProvider->TaskCompleted(); + std::vector workers(cl::AddRtHelpers); for (std::thread &worker : workers) { worker = std::thread([&helperThreadProvider, &helperThreadPayload] { @@ -3102,8 +3293,6 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, }); } - unsigned moduleIndex = 0; - while (!helperThreadPayload.helperThreadJoined && helperThreadProvider->GetNextTask(&moduleIndex)) { // NOTE: When a helper thread joins, it will move modules from the original context into a new one. However, // main thread may be processing on the original context at the same time, results in out of sync situation. @@ -3135,6 +3324,10 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, } else { for (auto [moduleIndex, module] : llvm::enumerate(newModules)) { + // Skip entry module here, it will be handled later. + if (moduleIndex == 0) + continue; + Result result = buildRayTracingPipelineElf(mainContext, std::move(module), pipelineElfs[moduleIndex], shaderProps, moduleCallsTraceRay, moduleIndex, pipeline, timerProfiler); if (result != Result::Success) @@ -3142,15 +3335,36 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, } } + // Build traversal at last after we gather all needed information. + if (traversalModule) { + if (rtContext.isContinuationsMode()) + ContHelper::setPreservedPayloadRegisterCount(*traversalModule, + rtContext.getRayTracingLibrarySummary().maxUsedPayloadRegisterCount); + + Result result = + buildRayTracingPipelineElf(mainContext, std::move(traversalModule), pipelineElfs[newModules.size()], + shaderProps, moduleCallsTraceRay, newModules.size(), pipeline, timerProfiler); + if (result != Result::Success) + return result; + } + + // Build entry module at very last. + Result result = buildRayTracingPipelineElf(mainContext, std::move(entry), pipelineElfs[0], shaderProps, + moduleCallsTraceRay, 0, pipeline, timerProfiler); + if (result != Result::Success) + return result; + return hasError ? Result::ErrorInvalidShader : Result::Success; } // ===================================================================================================================== -// Add raytracing pipeline indirect pipeline metadata -// The metadata is needed for RGP to correctly show different subtype of shaders. +// Adjust raytracing pipeline ELF package // -// @param [in/out] pipelineElfs : The pipeline ELF -void Compiler::addRayTracingIndirectPipelineMetadata(ElfPackage *pipelineElf) { +// @param [in/out] pipelineElf : The pipeline ELF +// @param [in] rtContext : The ray tracing context +// @param [in/out] shaderProp : The shader property +void Compiler::adjustRayTracingElf(ElfPackage *pipelineElf, RayTracingContext *rtContext, + RayTracingShaderProperty &shaderProp) { // Read the ELF package ElfWriter writer(m_gfxIp); Result result = writer.ReadFromBuffer(pipelineElf->data(), pipelineElf->size()); @@ -3169,43 +3383,47 @@ void Compiler::addRayTracingIndirectPipelineMetadata(ElfPackage *pipelineElf) { (void(success)); // unused // Get the shader_functions section - auto pipeline = document.getRoot().getMap(true)[PalAbi::CodeObjectMetadataKey::Pipelines].getArray(true)[0]; - auto shaderFunctionSection = pipeline.getMap(true)[PalAbi::PipelineMetadataKey::ShaderFunctions].getMap(true); + auto &pipeline = document.getRoot().getMap(true)[PalAbi::CodeObjectMetadataKey::Pipelines].getArray(true)[0]; + auto &shaderFunctionSection = pipeline.getMap(true)[PalAbi::PipelineMetadataKey::ShaderFunctions].getMap(true); // Get the shader function auto shaderFunctionName = shaderFunctionSection.begin()->first.getString(); - auto shaderFunction = shaderFunctionSection.begin()->second.getMap(true); + auto &shaderFunction = shaderFunctionSection.begin()->second.getMap(true); + // 1. Add raytracing pipeline indirect pipeline metadata + // The metadata is needed for RGP to correctly show different subtype of shaders. // Determine the shader subtype by name auto subtype = "Unknown"; - if (shaderFunctionName.contains("rgen")) - subtype = "RayGeneration"; - else if (shaderFunctionName.contains("miss")) - subtype = "Miss"; - else if (shaderFunctionName.contains("ahit")) - subtype = "AnyHit"; - else if (shaderFunctionName.contains("chit")) - subtype = "ClosestHit"; - else if (shaderFunctionName.contains("sect")) - subtype = "Intersection"; - else if (shaderFunctionName.contains("call")) - subtype = "Callable"; - else if (shaderFunctionName.contains("cs")) - subtype = "Traversal"; - + if (auto shaderStage = tryGetLgcRtShaderStageFromName(shaderFunctionName)) { + auto stage = shaderStage.value(); + if (stage == lgc::rt::RayTracingShaderStage::RayGeneration) + subtype = "RayGeneration"; + else if (stage == lgc::rt::RayTracingShaderStage::Miss) + subtype = "Miss"; + else if (stage == lgc::rt::RayTracingShaderStage::AnyHit) + subtype = "AnyHit"; + else if (stage == lgc::rt::RayTracingShaderStage::ClosestHit) + subtype = "ClosestHit"; + else if (stage == lgc::rt::RayTracingShaderStage::Intersection) + subtype = "Intersection"; + else if (stage == lgc::rt::RayTracingShaderStage::Callable) + subtype = "Callable"; + else if (stage == lgc::rt::RayTracingShaderStage::Traversal) + subtype = "Traversal"; + } shaderFunction[".shader_subtype"] = subtype; - // Apply the .internal_pipeline_hash to .api_shader_hash in .shader_functions section + // 2. Apply the .internal_pipeline_hash to .api_shader_hash in .shader_functions section // NOTE: this is needed for RGP to recognize different shader subtype auto pipelineHash = pipeline.getMap(true)[PalAbi::PipelineMetadataKey::InternalPipelineHash].getArray(true); shaderFunction[PalAbi::ShaderMetadataKey::ApiShaderHash].getArray(true)[0] = pipelineHash[0]; shaderFunction[PalAbi::ShaderMetadataKey::ApiShaderHash].getArray(true)[1] = pipelineHash[1]; - // Write to the pipeline ELF + // Write modified metadata to the pipeline ELF ElfNote newMetaNote = metaNote; std::string destBlob; document.writeToBlob(destBlob); - auto newData = new uint8_t[destBlob.size()]; // 4 is for additional alignment space + auto newData = new uint8_t[destBlob.size()]; memcpy(newData, destBlob.data(), destBlob.size()); newMetaNote.hdr.descSize = destBlob.size(); newMetaNote.data = newData; @@ -3400,9 +3618,10 @@ void Compiler::buildShaderCacheHash(Context *context, unsigned stageMask, ArrayR hasher.Update(stageHashes[getLgcShaderStage(stage)].data(), stageHashes[getLgcShaderStage(stage)].size()); // Update vertex input state - if (stage == ShaderStageVertex) + if (stage == ShaderStageVertex) { PipelineDumper::updateHashForVertexInputState(pipelineInfo->pVertexInput, pipelineInfo->dynamicVertexStride, &hasher); + } MetroHash::Hash hash = {}; hasher.Finalize(hash.bytes); @@ -3532,4 +3751,29 @@ unsigned getLgcShaderStageMask(ShaderStage stage) { return (1 << getLgcShaderStage(stage)); } +// ===================================================================================================================== +// Convert a name to middle-end LGC RT shader stage +// Returns std::nullopt if cannot determine +// +// @param name : The name to check +std::optional tryGetLgcRtShaderStageFromName(llvm::StringRef name) { + // TODO: We should eventually get rid of using name to determine shader stage + if (name.contains("rgen")) + return lgc::rt::RayTracingShaderStage::RayGeneration; + else if (name.contains("miss")) + return lgc::rt::RayTracingShaderStage::Miss; + else if (name.contains("ahit")) + return lgc::rt::RayTracingShaderStage::AnyHit; + else if (name.contains("chit")) + return lgc::rt::RayTracingShaderStage::ClosestHit; + else if (name.contains("sect")) + return lgc::rt::RayTracingShaderStage::Intersection; + else if (name.contains("call")) + return lgc::rt::RayTracingShaderStage::Callable; + else if (name.contains("cs")) + return lgc::rt::RayTracingShaderStage::Traversal; + + return std::nullopt; +} + } // namespace Llpc diff --git a/llpc/context/llpcCompiler.h b/llpc/context/llpcCompiler.h index 8c10ca3249..591b9e2e36 100644 --- a/llpc/context/llpcCompiler.h +++ b/llpc/context/llpcCompiler.h @@ -192,16 +192,17 @@ class Compiler : public ICompiler { std::vector &pipelineElfs, std::vector &shaderProps, IHelperThreadProvider *helperThreadProvider); - void addRayTracingIndirectPipelineMetadata(ElfPackage *pipelineElf); + void adjustRayTracingElf(ElfPackage *pipelineElf, RayTracingContext *rtContext, + Vkgc::RayTracingShaderProperty &shaderProp); Result buildUnlinkedShaderInternal(Context *context, llvm::ArrayRef shaderInfo, Vkgc::UnlinkedShaderStage stage, ElfPackage &elfPackage, llvm::MutableArrayRef stageCacheAccesses); void dumpCompilerOptions(void *pipelineDumpFile); + void dumpFragmentOutputs(void *pipelineDumpFile, const uint8_t *data, unsigned size); Result generatePipeline(Context *context, unsigned moduleIndex, std::unique_ptr module, ElfPackage &pipelineElf, lgc::Pipeline *pipeline, TimerProfiler &timerProfiler); std::vector m_options; // Compilation options - MetroHash::Hash m_optionHash; // Hash code of compilation options GfxIpVersion m_gfxIp; // Graphics IP version info Vkgc::ICache *m_cache; // Point to ICache implemented in client static unsigned m_instanceCount; // The count of compiler instance @@ -232,4 +233,8 @@ std::optional getLgcRtShaderStage(ShaderStage st // Convert front-end LLPC shader stage to middle-end LGC shader stage mask unsigned getLgcShaderStageMask(ShaderStage stage); +// Convert a name to middle-end LGC RT shader stage +// Returns std::nullopt if cannot determine +std::optional tryGetLgcRtShaderStageFromName(llvm::StringRef name); + } // namespace Llpc diff --git a/llpc/context/llpcContext.cpp b/llpc/context/llpcContext.cpp index 4d70b5205a..58b1d25c6b 100644 --- a/llpc/context/llpcContext.cpp +++ b/llpc/context/llpcContext.cpp @@ -29,7 +29,11 @@ *********************************************************************************************************************** */ #include "llpcContext.h" +#include "GfxRuntimeContext.h" +#include "LowerAdvancedBlend.h" +#include "ProcessGfxRuntimeLibrary.h" #include "SPIRVInternal.h" +#include "gfxruntime/GfxRuntimeLibrary.h" #include "llpcCompiler.h" #include "llpcDebug.h" #include "llpcPipelineContext.h" @@ -114,6 +118,7 @@ LgcContext *Context::getLgcContext() { m_builderContext.reset(LgcContext::create(&*m_targetMachine, *this, PAL_CLIENT_INTERFACE_MAJOR_VERSION)); lgc::GpurtContext::get(*this).theModule = nullptr; lgc::GpurtContext::get(*this).ownedTheModule.reset(); + lgc::GfxRuntimeContext::get(*this).theModule.reset(); // Pass the state of LLPC_OUTS on to LGC. LgcContext::setLlpcOuts(EnableOuts() ? &outs() : nullptr); @@ -207,6 +212,9 @@ void Context::setModuleTargetMachine(Module *module) { std::string dataLayoutStr = targetMachine->createDataLayout().getStringRepresentation(); // continuation stack address space. dataLayoutStr = dataLayoutStr + "-p" + std::to_string(cps::stackAddrSpace) + ":32:32"; + // SPIRV address spaces. + dataLayoutStr = dataLayoutStr + "-p" + std::to_string(SPIRAS_Input) + ":32:32"; + dataLayoutStr = dataLayoutStr + "-p" + std::to_string(SPIRAS_Output) + ":32:32"; module->setDataLayout(dataLayoutStr); } @@ -277,4 +285,53 @@ void Context::ensureGpurtLibrary() { gpurtContext.theModule = gpurtContext.ownedTheModule.get(); } +// ===================================================================================================================== +// Ensure that a GfxRuntime library module is attached to this context via GfxRuntimeContext. +void Context::ensureGfxRuntimeLibrary() { + // Check whether we already have a GPURT library module that can be used + auto &gfxRuntimeContext = lgc::GfxRuntimeContext::get(*this); + + if (gfxRuntimeContext.theModule) + return; + + // Create the GfxRuntime library module + ShaderModuleData moduleData = {}; + std::tie(moduleData.binCode.codeSize, moduleData.binCode.pCode) = Vkgc::GetAdvancedBlendLibrary(); + moduleData.binType = BinaryType::Spirv; + moduleData.usage.keepUnusedFunctions = true; + + PipelineShaderInfo shaderInfo = {}; + shaderInfo.entryStage = ShaderStageCompute; + shaderInfo.pEntryTarget = nullptr; + shaderInfo.pModuleData = &moduleData; + + auto gfxRuntime = std::make_unique("gfxruntime", *this); + setModuleTargetMachine(gfxRuntime.get()); + + TimerProfiler timerProfiler(getPipelineHashCode(), "LLPC GfxRuntime", TimerProfiler::PipelineTimerEnableMask); + std::unique_ptr lowerPassMgr(lgc::PassManager::Create(getLgcContext())); + SpirvLower::registerTranslationPasses(*lowerPassMgr); + + timerProfiler.addTimerStartStopPass(*lowerPassMgr, TimerTranslate, true); + + lowerPassMgr->addPass(SpirvLowerTranslator(ShaderStageCompute, &shaderInfo)); + if (EnableOuts()) { + lowerPassMgr->addPass( + PrintModulePass(outs(), "\n" + "===============================================================================\n" + "// LLPC SPIRV-to-LLVM translation results\n")); + } + + lowerPassMgr->addPass(SpirvLowerCfgMerges()); + lowerPassMgr->addPass(ProcessGfxRuntimeLibrary()); + lowerPassMgr->addPass(AlwaysInlinerPass()); + lowerPassMgr->addPass(SpirvLowerAccessChain()); + lowerPassMgr->addPass(SpirvLowerGlobal()); + timerProfiler.addTimerStartStopPass(*lowerPassMgr, TimerTranslate, false); + + lowerPassMgr->run(*gfxRuntime); + + gfxRuntimeContext.theModule = std::move(gfxRuntime); +} + } // namespace Llpc diff --git a/llpc/context/llpcContext.h b/llpc/context/llpcContext.h index f8ac03cbe5..0bb2595e40 100644 --- a/llpc/context/llpcContext.h +++ b/llpc/context/llpcContext.h @@ -124,6 +124,8 @@ class Context : public llvm::LLVMContext { void ensureGpurtLibrary(); + void ensureGfxRuntimeLibrary(); + private: Context() = delete; Context(const Context &) = delete; diff --git a/llpc/context/llpcGraphicsContext.cpp b/llpc/context/llpcGraphicsContext.cpp index b371ce09f0..708c773c50 100644 --- a/llpc/context/llpcGraphicsContext.cpp +++ b/llpc/context/llpcGraphicsContext.cpp @@ -254,44 +254,41 @@ Options GraphicsContext::computePipelineOptions() const { options.enableColorExportShader = pipelineInfo->enableColorExportShader; options.useSoftwareVertexBufferDescriptors = pipelineInfo->useSoftwareVertexBufferDescriptors; options.vbAddressLowBitsKnown = pipelineInfo->getGlState().vbAddressLowBitsKnown; - if (getGfxIpVersion().major >= 10) { - // Only set NGG options for a GFX10+ graphics pipeline. - const auto &nggState = pipelineInfo->nggState; - if (!nggState.enableNgg && getGfxIpVersion().major < 11) // GFX11+ must enable NGG - options.nggFlags |= NggFlagDisable; - else { - options.nggFlags = (nggState.enableGsUse ? NggFlagEnableGsUse : 0) | - (nggState.forceCullingMode ? NggFlagForceCullingMode : 0) | - (nggState.compactVertex ? NggFlagCompactVertex : 0) | - (nggState.enableBackfaceCulling ? NggFlagEnableBackfaceCulling : 0) | - (nggState.enableFrustumCulling ? NggFlagEnableFrustumCulling : 0) | - (nggState.enableBoxFilterCulling ? NggFlagEnableBoxFilterCulling : 0) | - (nggState.enableSphereCulling ? NggFlagEnableSphereCulling : 0) | - (nggState.enableSmallPrimFilter ? NggFlagEnableSmallPrimFilter : 0) | - (nggState.enableCullDistanceCulling ? NggFlagEnableCullDistanceCulling : 0); - options.nggBackfaceExponent = nggState.backfaceExponent; - - // Use a static cast from Vkgc NggSubgroupSizingType to LGC NggSubgroupSizing, and static assert that - // that is valid. - static_assert(static_cast(NggSubgroupSizingType::Auto) == NggSubgroupSizing::Auto, "Mismatch"); - static_assert(static_cast(NggSubgroupSizingType::MaximumSize) == - NggSubgroupSizing::MaximumSize, - "Mismatch"); - static_assert(static_cast(NggSubgroupSizingType::HalfSize) == NggSubgroupSizing::HalfSize, - "Mismatch"); - static_assert(static_cast(NggSubgroupSizingType::OptimizeForVerts) == - NggSubgroupSizing::OptimizeForVerts, - "Mismatch"); - static_assert(static_cast(NggSubgroupSizingType::OptimizeForPrims) == - NggSubgroupSizing::OptimizeForPrims, - "Mismatch"); - static_assert(static_cast(NggSubgroupSizingType::Explicit) == NggSubgroupSizing::Explicit, - "Mismatch"); - options.nggSubgroupSizing = static_cast(nggState.subgroupSizing); - - options.nggVertsPerSubgroup = nggState.vertsPerSubgroup; - options.nggPrimsPerSubgroup = nggState.primsPerSubgroup; - } + // Only set NGG options for a GFX10+ graphics pipeline. + const auto &nggState = pipelineInfo->nggState; + if (!nggState.enableNgg && getGfxIpVersion().major < 11) // GFX11+ must enable NGG + options.nggFlags |= NggFlagDisable; + else { + options.nggFlags = (nggState.enableGsUse ? NggFlagEnableGsUse : 0) | + (nggState.forceCullingMode ? NggFlagForceCullingMode : 0) | + (nggState.compactVertex ? NggFlagCompactVertex : 0) | + (nggState.enableBackfaceCulling ? NggFlagEnableBackfaceCulling : 0) | + (nggState.enableFrustumCulling ? NggFlagEnableFrustumCulling : 0) | + (nggState.enableBoxFilterCulling ? NggFlagEnableBoxFilterCulling : 0) | + (nggState.enableSphereCulling ? NggFlagEnableSphereCulling : 0) | + (nggState.enableSmallPrimFilter ? NggFlagEnableSmallPrimFilter : 0) | + (nggState.enableCullDistanceCulling ? NggFlagEnableCullDistanceCulling : 0); + options.nggBackfaceExponent = nggState.backfaceExponent; + + // Use a static cast from Vkgc NggSubgroupSizingType to LGC NggSubgroupSizing, and static assert that + // that is valid. + static_assert(static_cast(NggSubgroupSizingType::Auto) == NggSubgroupSizing::Auto, "Mismatch"); + static_assert(static_cast(NggSubgroupSizingType::MaximumSize) == NggSubgroupSizing::MaximumSize, + "Mismatch"); + static_assert(static_cast(NggSubgroupSizingType::HalfSize) == NggSubgroupSizing::HalfSize, + "Mismatch"); + static_assert(static_cast(NggSubgroupSizingType::OptimizeForVerts) == + NggSubgroupSizing::OptimizeForVerts, + "Mismatch"); + static_assert(static_cast(NggSubgroupSizingType::OptimizeForPrims) == + NggSubgroupSizing::OptimizeForPrims, + "Mismatch"); + static_assert(static_cast(NggSubgroupSizingType::Explicit) == NggSubgroupSizing::Explicit, + "Mismatch"); + options.nggSubgroupSizing = static_cast(nggState.subgroupSizing); + + options.nggVertsPerSubgroup = nggState.vertsPerSubgroup; + options.nggPrimsPerSubgroup = nggState.primsPerSubgroup; } return options; } diff --git a/llpc/context/llpcPipelineContext.cpp b/llpc/context/llpcPipelineContext.cpp index c910569236..5349096c8e 100644 --- a/llpc/context/llpcPipelineContext.cpp +++ b/llpc/context/llpcPipelineContext.cpp @@ -726,7 +726,7 @@ uint64_t PipelineContext::getPipelineHashCode() const { // ===================================================================================================================== // Get wave size used for raytracing unsigned PipelineContext::getRayTracingWaveSize() const { - if ((m_gfxIp.major >= 10) && getPipelineType() != PipelineType::Graphics) + if (getPipelineType() != PipelineType::Graphics) return 32; return 64; } diff --git a/llpc/context/llpcRayTracingContext.h b/llpc/context/llpcRayTracingContext.h index d95f35eab0..c601a3a1bb 100644 --- a/llpc/context/llpcRayTracingContext.h +++ b/llpc/context/llpcRayTracingContext.h @@ -117,6 +117,7 @@ class RayTracingContext : public PipelineContext { unsigned hasLibraryStage(unsigned stageMask) { return m_pipelineInfo->pipelineLibStageMask & stageMask; } bool isReplay() { return m_pipelineInfo->isReplay; } Vkgc::LlpcRaytracingMode getRaytracingMode() { return m_pipelineInfo->mode; } + bool isContinuationsMode() { return getRaytracingMode() == Vkgc::LlpcRaytracingMode::Continuations; } unsigned getCpsFlag() { return m_pipelineInfo->cpsFlags; } protected: diff --git a/llpc/docs/amdllpc.md b/llpc/docs/amdllpc.md index fcc9c1e296..3ab778104c 100644 --- a/llpc/docs/amdllpc.md +++ b/llpc/docs/amdllpc.md @@ -5,14 +5,14 @@ LLPC can be built into a standalone offline compiler (amdllpc). It supports GLSL ## Build instructions LLPC is normally built as part of the [AMD Open Source Driver for Vulkan](https://github.com/GPUOpen-Drivers/AMDVLK/blob/dev/README.md). The build includes standalone `lgc` and `amdllpc` (**Note:** You need to add the option `-DXGL_BUILD_TOOLS=ON` in the AMDVLK cmake command before building `amdllpc`). -You can build `lgc amdllpc` only or build `check-lgc check-lgc-units check-amdllpc check-amdllpc-units check-continuations check-continuations-units` to run local tests besides the build (**Note:** You need to add the option `-DXGL_BUILD_TESTS=ON` in the AMDVLK cmake command before building these local test targets). +You can build `lgc amdllpc` only or build `check-lgc check-lgc-units check-amdllpc check-amdllpc-units check-llvmraytracing check-llvmraytracing-units` to run local tests besides the build (**Note:** You need to add the option `-DXGL_BUILD_TESTS=ON` in the AMDVLK cmake command before building these local test targets). ``` cmake --build xgl/builds/Release64 --target lgc amdllpc ``` or ``` -cmake --build xgl/builds/Release64 --target check-lgc check-lgc-units check-amdllpc check-amdllpc-units check-continuations check-continuations-units +cmake --build xgl/builds/Release64 --target check-lgc check-lgc-units check-amdllpc check-amdllpc-units check-llvmraytracing check-llvmraytracing-units ``` LLPC also contains amber tests that need an actual GPU to run. See the [test directory](../../test/) for more information. @@ -48,7 +48,7 @@ Once you have followed the driver build instructions for installing source, star ``` cd llpc cmake -G Ninja -B build [-DPAL_CLIENT_INTERFACE_MAJOR_VERSION=] -cmake --build build --target check-lgc check-lgc-units check-amdllpc check-amdllpc-units check-continuations check-continuations-units +cmake --build build --target check-lgc check-lgc-units check-amdllpc check-amdllpc-units check-llvmraytracing check-llvmraytracing-units ``` See above if this gives an error due to not finding an include file from glslang or SPIRV-Tools. diff --git a/llpc/lower/LowerAdvancedBlend.cpp b/llpc/lower/LowerAdvancedBlend.cpp new file mode 100644 index 0000000000..e06f675fe1 --- /dev/null +++ b/llpc/lower/LowerAdvancedBlend.cpp @@ -0,0 +1,144 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file LowerAdvancedBlend.cpp + * @brief LLPC source file: contains implementation of class Llpc::LowerAdvancedBlend. + *********************************************************************************************************************** + */ +#include "LowerAdvancedBlend.h" +#include "GfxRuntimeContext.h" +#include "SPIRVInternal.h" +#include "compilerutils/CompilerUtils.h" +#include "llpcContext.h" +#include "llpcSpirvLowerInternalLibraryIntrinsicUtil.h" +#include "vkgcDefs.h" +#include "lgc/Builder.h" + +#define DEBUG_TYPE "Lower-advanced-blend" + +using namespace lgc; +using namespace llvm; +using namespace Llpc; + +namespace Llpc { +static const char *AdvancedBlendInternal = "AmdAdvancedBlendInternal"; +static const char *AdvancedBlendModeName = "_mode"; +static const char *AdvancedBlendIsMsaaName = "_isMsaa"; + +// ===================================================================================================================== +LowerAdvancedBlend::LowerAdvancedBlend(unsigned binding) : m_binding(binding) { +} + +// ===================================================================================================================== +// Executes this SPIR-V lowering pass on the specified LLVM module. +// +// @param [in/out] module : LLVM module to be run on (empty on entry) +// @param [in/out] analysisManager : Analysis manager to use for this transformation +PreservedAnalyses LowerAdvancedBlend::run(Module &module, ModuleAnalysisManager &analysisManager) { + LLVM_DEBUG(dbgs() << "Run the pass Lower-advanced-blend\n"); + SpirvLower::init(&module); + + if (m_shaderStage == ShaderStageFragment) { + processFsOutputs(module); + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); +} + +// ===================================================================================================================== +// Apply blending function on outputs of fragment shader +// +// @param [in/out] module : LLVM module to be run on +void LowerAdvancedBlend::processFsOutputs(Module &module) { + // Get the outputs of FS + SmallVector outputs; + Value *modeUniform = nullptr; + Value *isMsaaUniform = nullptr; + for (auto &global : module.globals()) { + if (global.getType()->getAddressSpace() == SPIRAS_Output) + outputs.push_back(&global); + if (global.getType()->getAddressSpace() == SPIRAS_Uniform && global.getName().ends_with(AdvancedBlendModeName)) + modeUniform = &global; + if (global.getType()->getAddressSpace() == SPIRAS_Uniform && global.getName().ends_with(AdvancedBlendIsMsaaName)) + isMsaaUniform = &global; + } + // Prepare arguments of AmdAdvancedBlend(inColor, imageDescMsLow, imageDescMsHigh, imageDescLow, imageDescHigh, + // fmaskDescLow, fmaskDescHigh, mode, isMsaa) from shaderLibrary + m_builder->SetInsertPointPastAllocas(m_entryPoint); + + // Get the parameters and store them into the allocated parameter points + Type *descType = FixedVectorType::get(m_builder->getInt32Ty(), 8); + unsigned bindings[2] = {m_binding, m_binding + 1}; + Value *imageDescLow[2] = {}; + Value *imageDescHigh[2] = {}; + for (unsigned id = 0; id < 2; ++id) { + unsigned descSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorResource); + Value *imageDescPtr = m_builder->CreateGetDescPtr(ResourceNodeType::DescriptorResource, + ResourceNodeType::DescriptorResource, descSet, bindings[id]); + Value *imageDesc = m_builder->CreateLoad(descType, imageDescPtr); + imageDescLow[id] = m_builder->CreateShuffleVector(imageDesc, ArrayRef{0, 1, 2, 3}); + imageDescHigh[id] = m_builder->CreateShuffleVector(imageDesc, ArrayRef{4, 5, 6, 7}); + } + + unsigned descSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorFmask); + Value *fmaskDescPtr = m_builder->CreateGetDescPtr(ResourceNodeType::DescriptorFmask, + ResourceNodeType::DescriptorFmask, descSet, m_binding); + Value *fmaskDesc = m_builder->CreateLoad(descType, fmaskDescPtr); + Value *fmaskDescLow = m_builder->CreateShuffleVector(fmaskDesc, ArrayRef{0, 1, 2, 3}); + Value *fmaskDescHigh = m_builder->CreateShuffleVector(fmaskDesc, ArrayRef{4, 5, 6, 7}); + + assert(modeUniform && isMsaaUniform); + modeUniform = m_builder->CreateLoad(m_builder->getInt32Ty(), modeUniform); + + isMsaaUniform = + m_builder->CreateTrunc(m_builder->CreateLoad(m_builder->getInt32Ty(), isMsaaUniform), m_builder->getInt1Ty()); + + // Link the gfxruntime library module + GfxRuntimeContext &gfxRuntimeContext = GfxRuntimeContext::get(*m_context); + auto *advancedBlendFunc = (*gfxRuntimeContext.theModule).getFunction(AdvancedBlendInternal); + + CompilerUtils::CrossModuleInliner inliner; + + // Call AmdAdvancedBlendInternal() for each output + for (auto [i, outCol] : llvm::enumerate(outputs)) { + for (auto user : outCol->users()) { + auto storeInst = cast(user); + assert(storeInst); + Value *srcVal = storeInst->getValueOperand(); + m_builder->SetInsertPoint(storeInst); + + Value *blendColor = inliner + .inlineCall(*m_builder, advancedBlendFunc, + {srcVal, imageDescLow[0], imageDescHigh[0], imageDescLow[1], imageDescHigh[1], + fmaskDescLow, fmaskDescHigh, modeUniform, isMsaaUniform}) + .returnValue; + + storeInst->setOperand(0, blendColor); + } + } +} + +} // namespace Llpc diff --git a/llpc/lower/LowerAdvancedBlend.h b/llpc/lower/LowerAdvancedBlend.h new file mode 100644 index 0000000000..3539368eeb --- /dev/null +++ b/llpc/lower/LowerAdvancedBlend.h @@ -0,0 +1,53 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file LowerAdvancedBlend.h + * @brief LLPC header file: contains declaration of Llpc::LowerAdvancedBlend + *********************************************************************************************************************** + */ +#pragma once + +#include "llpcSpirvLower.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/PassManager.h" + +namespace Llpc { +// ===================================================================================================================== +// Represents the pass of SPIR-V lowering advanced blend shader +class LowerAdvancedBlend : public SpirvLower, public llvm::PassInfoMixin { + +public: + LowerAdvancedBlend(unsigned binding = 0); + llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); + static llvm::StringRef name() { return "Lower SPIR-V advanced blend shader"; } + +private: + typedef void (LowerAdvancedBlend::*LibraryFuncPtr)(llvm::Function *, unsigned); + void processFsOutputs(llvm::Module &module); + + unsigned m_binding; // The binding point for the multi-sample +}; +} // namespace Llpc diff --git a/llpc/lower/llpcSpirvLowerRayQueryPostInline.cpp b/llpc/lower/LowerPostInline.cpp similarity index 91% rename from llpc/lower/llpcSpirvLowerRayQueryPostInline.cpp rename to llpc/lower/LowerPostInline.cpp index 5288e9b3a1..64400f8193 100644 --- a/llpc/lower/llpcSpirvLowerRayQueryPostInline.cpp +++ b/llpc/lower/LowerPostInline.cpp @@ -24,11 +24,11 @@ **********************************************************************************************************************/ /** *********************************************************************************************************************** - * @file llpcSpirvLowerRayQueryPostInline.cpp - * @brief LLPC source file: contains implementation of class Llpc::SpirvLowerRayQueryPostInline. + * @file LowerPostInline.cpp + * @brief LLPC source file: contains implementation of class Llpc::LowerPostInline. *********************************************************************************************************************** */ -#include "llpcSpirvLowerRayQueryPostInline.h" +#include "LowerPostInline.h" #include "SPIRVInternal.h" #include "llpcContext.h" #include "llpcSpirvLowerUtil.h" @@ -36,7 +36,7 @@ #include "lgc/Pipeline.h" #include "llvm/IR/DerivedTypes.h" -#define DEBUG_TYPE "llpc-spirv-lower-ray-query-post-inline" +#define DEBUG_TYPE "lower-post-inline" using namespace llvm; using namespace Llpc; @@ -48,7 +48,7 @@ namespace Llpc { // // @param [in/out] module : LLVM module to be run on // @param [in/out] analysisManager : Analysis manager to use for this transformation -PreservedAnalyses SpirvLowerRayQueryPostInline::run(Module &module, ModuleAnalysisManager &analysisManager) { +PreservedAnalyses LowerPostInline::run(Module &module, ModuleAnalysisManager &analysisManager) { LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-ray-query-post-inline\n"); for (Function &func : module) { diff --git a/llpc/lower/llpcSpirvLowerRayQueryPostInline.h b/llpc/lower/LowerPostInline.h similarity index 85% rename from llpc/lower/llpcSpirvLowerRayQueryPostInline.h rename to llpc/lower/LowerPostInline.h index 4fc9fde303..b3937f2017 100644 --- a/llpc/lower/llpcSpirvLowerRayQueryPostInline.h +++ b/llpc/lower/LowerPostInline.h @@ -24,8 +24,8 @@ **********************************************************************************************************************/ /** *********************************************************************************************************************** - * @file llpcSpirvLowerRayQueryPostInline.h - * @brief LLPC header file: contains declaration of Llpc::SpirvLowerRayQueryPostInline + * @file LowerPostInline.h + * @brief LLPC header file: contains declaration of Llpc::LowerPostInline *********************************************************************************************************************** */ #pragma once @@ -36,11 +36,11 @@ namespace Llpc { // Represents the pass of SPIR-V lowering ray query post inline. -class SpirvLowerRayQueryPostInline : public SpirvLower, public llvm::PassInfoMixin { +class LowerPostInline : public SpirvLower, public llvm::PassInfoMixin { public: llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - static llvm::StringRef name() { return "Lower SPIR-V RayQueryPostInline operations"; } + static llvm::StringRef name() { return "Lower SPIR-V PostInline operations"; } }; } // namespace Llpc diff --git a/llpc/lower/PassRegistry.inc b/llpc/lower/PassRegistry.inc index d38e26d0c4..940cd516e7 100644 --- a/llpc/lower/PassRegistry.inc +++ b/llpc/lower/PassRegistry.inc @@ -54,7 +54,7 @@ LLPC_MODULE_PASS("llpc-spirv-lower-math-precision", SpirvLowerMathPrecision) LLPC_MODULE_PASS("llpc-spirv-lower-math-float-op", SpirvLowerMathFloatOp) LLPC_MODULE_PASS("llpc-spirv-lower-memory-op", SpirvLowerMemoryOp) LLPC_MODULE_PASS("llpc-spirv-lower-ray-tracing", SpirvLowerRayTracing) -LLPC_MODULE_PASS("llpc-spirv-lower-ray-query-post-inline", SpirvLowerRayQueryPostInline) +LLPC_MODULE_PASS("lower-post-inline", LowerPostInline) #undef LLPC_PASS #undef LLPC_MODULE_PASS diff --git a/llpc/lower/PrepareContinuations.cpp b/llpc/lower/PrepareContinuations.cpp index b03e24c4dd..696c12dae8 100644 --- a/llpc/lower/PrepareContinuations.cpp +++ b/llpc/lower/PrepareContinuations.cpp @@ -31,9 +31,11 @@ #include "PrepareContinuations.h" #include "compilerutils/CompilerUtils.h" #include "llpcContext.h" +#include "llpcRayTracingContext.h" #include "llvmraytracing/ContinuationsUtil.h" #include "llvmraytracing/GpurtContext.h" #include "lgc/Builder.h" +#include "llvm/IR/Module.h" #define DEBUG_TYPE "prepare-continuations" using namespace lgc; @@ -75,6 +77,13 @@ PreservedAnalyses PrepareContinuations::run(Module &module, ModuleAnalysisManage inliner.inlineCall(builder, contKernel, {}); setLgcRtShaderStage(entryFunc, RayTracingShaderStage::KernelEntry); lgc::Pipeline::markShaderEntryPoint(entryFunc, lgc::ShaderStage::Compute); + } else { + m_entryPoint->setName(module.getName()); + auto rtContext = static_cast(m_context->getPipelineContext()); + + ContHelper::setMaxPayloadRegisterCount(module, cps::CpsPayloadMaxNumVgprs); + + setShaderHitAttributeSize(m_entryPoint, rtContext->getAttributeDataSizeInBytes()); } return PreservedAnalyses::none(); diff --git a/llpc/lower/ProcessGfxRuntimeLibrary.cpp b/llpc/lower/ProcessGfxRuntimeLibrary.cpp new file mode 100644 index 0000000000..378a8e749e --- /dev/null +++ b/llpc/lower/ProcessGfxRuntimeLibrary.cpp @@ -0,0 +1,180 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file ProcessGfxRuntimeLibrary.cpp + * @brief LLPC source file: contains implementation of class Llpc::ProcessGfxRuntimeLibrary. + *********************************************************************************************************************** + */ +#include "ProcessGfxRuntimeLibrary.h" +#include "llpcSpirvLowerInternalLibraryIntrinsicUtil.h" +#include "llpcSpirvLowerUtil.h" +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsUtil.h" +#include "lgc/Builder.h" +#include "llvm/ADT/SmallBitVector.h" + +#define DEBUG_TYPE "process-gfxruntime-library" +using namespace lgc; +using namespace llvm; + +namespace Llpc { +ProcessGfxRuntimeLibrary::ProcessGfxRuntimeLibrary() { +} + +// ===================================================================================================================== +// Executes this SPIR-V lowering pass on the specified LLVM module. +// +// @param [in/out] module : LLVM module to be run on +// @param [in/out] analysisManager : Analysis manager to use for this transformation +PreservedAnalyses ProcessGfxRuntimeLibrary::run(Module &module, ModuleAnalysisManager &analysisManager) { + LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-gfxruntime-library\n"); + SpirvLower::init(&module); + for (auto funcIt = module.begin(), funcEnd = module.end(); funcIt != funcEnd;) { + Function *func = &*funcIt++; + processLibraryFunction(func); + } + + return PreservedAnalyses::none(); +} + +// ===================================================================================================================== +// Initialize library function pointer table +ProcessGfxRuntimeLibrary::LibraryFunctionTable::LibraryFunctionTable() { + m_libFuncPtrs["AmdAdvancedBlendTexelLoad"] = &ProcessGfxRuntimeLibrary::createTexelLoad; + m_libFuncPtrs["AmdAdvancedBlendTexelLoadFmask"] = &ProcessGfxRuntimeLibrary::createTexelLoadFmask; + m_libFuncPtrs["AmdAdvancedBlendCoherentTexelLoad"] = &ProcessGfxRuntimeLibrary::createCoherentTexelLoad; + m_libFuncPtrs["AmdAdvancedBlendCoherentTexelStore"] = &ProcessGfxRuntimeLibrary::createCoherentTexelStore; +} + +// ===================================================================================================================== +// Clear the block before patching the function +// +// @param func : The function to process +void ProcessGfxRuntimeLibrary::processLibraryFunction(Function *&func) { + auto funcName = func->getName(); + + static const char *AdvancedBlendInternalName = "AmdAdvancedBlendInternal"; + if (funcName.starts_with(AdvancedBlendInternalName)) { + func->setLinkage(GlobalValue::ExternalLinkage); + SmallBitVector promotionMask(func->arg_size()); + for (unsigned argId = 0; argId < func->arg_size(); ++argId) { + auto *arg = func->getArg(argId); + ContArgTy argTy = ContArgTy::get(func, arg); + if (!argTy.isPointerTy()) + continue; + promotionMask.set(argId); + } + func = promotePointerArguments(func, promotionMask); + return; + } + + auto gfxruntimeFuncTable = LibraryFunctionTable::get().m_libFuncPtrs; + auto gfxruntimeFuncIt = gfxruntimeFuncTable.find(funcName); + if (gfxruntimeFuncIt != gfxruntimeFuncTable.end()) { + auto funcPtr = gfxruntimeFuncIt->second; + m_builder->SetInsertPoint(clearBlock(func)); + (this->*funcPtr)(func); + return; + } + + auto &commonFuncTable = InternalLibraryIntrinsicUtil::LibraryFunctionTable::get().m_libFuncPtrs; + auto commonFuncIt = commonFuncTable.find(funcName); + if (commonFuncIt != commonFuncTable.end()) { + auto funcPtr = commonFuncIt->second; + m_builder->SetInsertPoint(clearBlock(func)); + (*funcPtr)(func, m_builder); + return; + } +} + +// ===================================================================================================================== +// Create texel load +void ProcessGfxRuntimeLibrary::createTexelLoad(Function *func) { + // Arguments: imageDescLow, imageDescHigh, icoord, lod + constexpr unsigned argCount = 4; + Type *int4Ty = FixedVectorType::get(m_builder->getInt32Ty(), 4); + Type *int2Ty = FixedVectorType::get(m_builder->getInt32Ty(), 2); + Type *argTypes[] = {int4Ty, int4Ty, int2Ty, m_builder->getInt32Ty()}; + std::array loadArgs; + for (unsigned i = 0; i < argCount; ++i) + loadArgs[i] = m_builder->CreateLoad(argTypes[i], func->getArg(i)); + unsigned imageFlag = Builder::ImageFlagInvariant | Builder::ImageFlagNotAliased; + auto imageDesc = m_builder->CreateShuffleVector(loadArgs[0], loadArgs[1], ArrayRef{0, 1, 2, 3, 4, 5, 6, 7}); + auto imageLoad = + m_builder->CreateImageLoad(func->getReturnType(), Builder::Dim2D, imageFlag, imageDesc, loadArgs[2], loadArgs[3]); + m_builder->CreateRet(imageLoad); +} + +// ===================================================================================================================== +// Create texel load with fmask +void ProcessGfxRuntimeLibrary::createTexelLoadFmask(Function *func) { + // Argument: imageDescLow, imageDescHigh, fmaskDescLow, fmaskDescHigh, icoord, lod + constexpr unsigned argCount = 6; + Type *int4Ty = FixedVectorType::get(m_builder->getInt32Ty(), 4); + Type *int2Ty = FixedVectorType::get(m_builder->getInt32Ty(), 2); + Type *argTypes[] = {int4Ty, int4Ty, int4Ty, int4Ty, int2Ty, m_builder->getInt32Ty()}; + std::array loadArgs; + for (unsigned i = 0; i < argCount; ++i) + loadArgs[i] = m_builder->CreateLoad(argTypes[i], func->getArg(i)); + unsigned imageFlag = Builder::ImageFlagInvariant | Builder::ImageFlagNotAliased; + auto imageDesc = m_builder->CreateShuffleVector(loadArgs[0], loadArgs[1], ArrayRef{0, 1, 2, 3, 4, 5, 6, 7}); + auto fmaskDesc = m_builder->CreateShuffleVector(loadArgs[2], loadArgs[3], ArrayRef{0, 1, 2, 3, 4, 5, 6, 7}); + auto imageLoad = m_builder->CreateImageLoadWithFmask(func->getReturnType(), Builder::Dim2DMsaa, imageFlag, imageDesc, + fmaskDesc, loadArgs[4], loadArgs[5]); + m_builder->CreateRet(imageLoad); +} + +// ===================================================================================================================== +// Create coherent texel Load +void ProcessGfxRuntimeLibrary::createCoherentTexelLoad(Function *func) { + // Argument: inColor, icoord, sampleId + constexpr unsigned argCount = 3; + Type *Float4Ty = FixedVectorType::get(m_builder->getFloatTy(), 4); + Type *int2Ty = FixedVectorType::get(m_builder->getInt32Ty(), 2); + Type *argTypes[] = {Float4Ty, int2Ty, m_builder->getInt32Ty()}; + std::array loadArgs; + for (unsigned i = 0; i < argCount; ++i) + loadArgs[i] = m_builder->CreateLoad(argTypes[i], func->getArg(i)); + // TODO: Implement load texel based on ROV + m_builder->CreateRet(loadArgs[0]); +} + +// ===================================================================================================================== +// Create coherent texel store +void ProcessGfxRuntimeLibrary::createCoherentTexelStore(Function *func) { + // Argument: inColor, icoord, sampleId + constexpr unsigned argCount = 3; + Type *Float4Ty = FixedVectorType::get(m_builder->getFloatTy(), 4); + Type *int2Ty = FixedVectorType::get(m_builder->getInt32Ty(), 2); + Type *argTypes[] = {Float4Ty, int2Ty, m_builder->getInt32Ty()}; + std::array storeArgs; + for (unsigned i = 0; i < argCount; ++i) + storeArgs[i] = m_builder->CreateLoad(argTypes[i], func->getArg(i)); + // TODO: Implement store texel based on ROV + m_builder->CreateRetVoid(); +} + +} // namespace Llpc diff --git a/llpc/lower/ProcessGfxRuntimeLibrary.h b/llpc/lower/ProcessGfxRuntimeLibrary.h new file mode 100644 index 0000000000..19a48861c6 --- /dev/null +++ b/llpc/lower/ProcessGfxRuntimeLibrary.h @@ -0,0 +1,58 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file ProcessGfxRuntimeLibrary.h + * @brief LLPC header file: contains declaration of Llpc::ProcessGfxRuntimeLibrary + *********************************************************************************************************************** + */ +#pragma once + +#include "llpcSpirvLower.h" +#include "llvm/IR/PassManager.h" + +namespace Llpc { +class ProcessGfxRuntimeLibrary : public SpirvLower, public llvm::PassInfoMixin { +public: + ProcessGfxRuntimeLibrary(); + llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); + +private: + typedef void (ProcessGfxRuntimeLibrary::*LibraryFuncPtr)(llvm::Function *); + struct LibraryFunctionTable { + llvm::DenseMap m_libFuncPtrs; + LibraryFunctionTable(); + static const LibraryFunctionTable &get() { + static LibraryFunctionTable instance; + return instance; + } + }; + void processLibraryFunction(llvm::Function *&func); + void createTexelLoad(llvm::Function *func); + void createTexelLoadFmask(llvm::Function *func); + void createCoherentTexelLoad(llvm::Function *func); + void createCoherentTexelStore(llvm::Function *func); +}; +} // namespace Llpc diff --git a/llpc/lower/llpcSpirvLower.cpp b/llpc/lower/llpcSpirvLower.cpp index 0c7501e102..d6738dc96f 100644 --- a/llpc/lower/llpcSpirvLower.cpp +++ b/llpc/lower/llpcSpirvLower.cpp @@ -30,6 +30,7 @@ */ #include "llpcSpirvLower.h" #include "LowerGLCompatibility.h" +#include "LowerPostInline.h" #include "llpcContext.h" #include "llpcDebug.h" #include "llpcSpirvLowerAccessChain.h" @@ -40,7 +41,6 @@ #include "llpcSpirvLowerInstMetaRemove.h" #include "llpcSpirvLowerMath.h" #include "llpcSpirvLowerMemoryOp.h" -#include "llpcSpirvLowerRayQueryPostInline.h" #include "llpcSpirvLowerRayTracing.h" #include "llpcSpirvLowerTerminator.h" #include "llpcSpirvLowerTranslator.h" @@ -52,6 +52,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ReplaceConstant.h" #include "llvm/IR/Verifier.h" #if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 442438 // Old version of the code @@ -88,82 +89,6 @@ using namespace lgc; using namespace llvm; namespace Llpc { -// ===================================================================================================================== -// Replace a constant with instructions using a builder. -// -// @param context : The context -// @param [in/out] constVal : The constant to replace with instructions. -void SpirvLower::replaceConstWithInsts(Context *context, Constant *const constVal) - -{ - SmallSet otherConsts; - Builder *builder = context->getBuilder(); - for (User *const user : constVal->users()) { - if (Constant *const otherConst = dyn_cast(user)) - otherConsts.insert(otherConst); - } - - for (Constant *const otherConst : otherConsts) - replaceConstWithInsts(context, otherConst); - - otherConsts.clear(); - - SmallVector users; - - for (User *const user : constVal->users()) - users.push_back(user); - - for (Value *const user : users) { - Instruction *const inst = cast(user); - - // If the instruction is a phi node, we have to insert the new instructions in the correct predecessor. - if (PHINode *const phiNode = dyn_cast(inst)) { - const unsigned incomingValueCount = phiNode->getNumIncomingValues(); - for (unsigned i = 0; i < incomingValueCount; i++) { - if (phiNode->getIncomingValue(i) == constVal) { - builder->SetInsertPoint(phiNode->getIncomingBlock(i)->getTerminator()); - break; - } - } - } else - builder->SetInsertPoint(inst); - - if (ConstantExpr *const constExpr = dyn_cast(constVal)) { - Instruction *const insertPos = builder->Insert(constExpr->getAsInstruction()); - inst->replaceUsesOfWith(constExpr, insertPos); - } else if (ConstantVector *const constVector = dyn_cast(constVal)) { - Value *resultValue = PoisonValue::get(constVector->getType()); - for (unsigned i = 0; i < constVector->getNumOperands(); i++) { - // Have to not use the builder here because it will constant fold and we are trying to undo that now! - Instruction *const insertPos = - InsertElementInst::Create(resultValue, constVector->getOperand(i), builder->getInt32(i)); - resultValue = builder->Insert(insertPos); - } - inst->replaceUsesOfWith(constVector, resultValue); - } else - llvm_unreachable("Should never be called!"); - } - - constVal->removeDeadConstantUsers(); - constVal->destroyConstant(); -} - -// ===================================================================================================================== -// Removes those constant expressions that reference global variables. -// -// @param context : The context -// @param global : The global variable -void SpirvLower::removeConstantExpr(Context *context, GlobalVariable *global) { - SmallVector constantUsers; - - for (User *const user : global->users()) { - if (Constant *const constant = dyn_cast(user)) - constantUsers.push_back(constant); - } - - for (Constant *const constVal : constantUsers) - replaceConstWithInsts(context, constVal); -} // ===================================================================================================================== // Add per-shader lowering passes to pass manager @@ -193,8 +118,8 @@ void SpirvLower::addPasses(Context *context, ShaderStage stage, lgc::PassManager // Lower SPIR-V access chain passMgr.addPass(SpirvLowerAccessChain()); - if (lowerFlag.isRayQuery) - passMgr.addPass(SpirvLowerRayQueryPostInline()); + if (lowerFlag.isRayQuery || lowerFlag.usesAdvancedBlend) + passMgr.addPass(LowerPostInline()); // Lower SPIR-V terminators passMgr.addPass(SpirvLowerTerminator()); @@ -310,16 +235,8 @@ void SpirvLower::registerLoweringPasses(lgc::PassManager &passMgr) { // @param original : Replaced global variable // @param replacement : Replacing global variable void SpirvLower::replaceGlobal(Context *context, GlobalVariable *original, GlobalVariable *replacement) { - removeConstantExpr(context, original); - Builder *builder = context->getBuilder(); - SmallVector users(original->users()); - for (User *user : users) { - Instruction *inst = cast(user); - builder->SetInsertPoint(inst); - Value *replacedValue = builder->CreateBitCast(replacement, original->getType()); - user->replaceUsesOfWith(original, replacedValue); - } - original->dropAllReferences(); + convertUsersOfConstantsToInstructions(original); + original->replaceAllUsesWith(replacement); original->eraseFromParent(); } diff --git a/llpc/lower/llpcSpirvLower.h b/llpc/lower/llpcSpirvLower.h index 947124a04e..f1a788c761 100644 --- a/llpc/lower/llpcSpirvLower.h +++ b/llpc/lower/llpcSpirvLower.h @@ -57,7 +57,8 @@ union LowerFlag { unsigned isRayTracing : 1; // Whether we are lowering a ray tracing pipeline shader unsigned isRayQuery : 1; // Whether we are lowering a ray query library unsigned isInternalRtShader : 1; // Whether we are lowering an internal ray tracing shader - unsigned reserved : 29; + unsigned usesAdvancedBlend : 1; // Whether we are lowering an advanced blend shader + unsigned reserved : 28; }; unsigned u32All; }; @@ -76,8 +77,6 @@ class SpirvLower { // Register all the lowering passes into the given pass manager static void registerLoweringPasses(lgc::PassManager &passMgr); - static void removeConstantExpr(Context *context, llvm::GlobalVariable *global); - static void replaceConstWithInsts(Context *context, llvm::Constant *const constVal); static void replaceGlobal(Context *context, llvm::GlobalVariable *original, llvm::GlobalVariable *replacement); protected: diff --git a/llpc/lower/llpcSpirvLowerGlobal.cpp b/llpc/lower/llpcSpirvLowerGlobal.cpp index b6e4ee24f3..ef982d0525 100644 --- a/llpc/lower/llpcSpirvLowerGlobal.cpp +++ b/llpc/lower/llpcSpirvLowerGlobal.cpp @@ -30,6 +30,7 @@ */ #include "llpcSpirvLowerGlobal.h" #include "SPIRVInternal.h" +#include "continuations/ContinuationsUtil.h" #include "llpcContext.h" #include "llpcDebug.h" #include "llpcGraphicsContext.h" @@ -56,12 +57,6 @@ using namespace SPIRV; using namespace Llpc; using namespace lgc::rt; -namespace RtName { -static const char *HitAttribute = "HitAttribute"; -static const char *IncomingRayPayLoad = "IncomingRayPayloadKHR"; -static const char *IncomingCallableData = "IncomingCallableDataKHR"; -} // namespace RtName - namespace Llpc { // The code here relies on the SPIR-V built-in kind being the same as the Builder built-in kind. @@ -216,7 +211,7 @@ PreservedAnalyses SpirvLowerGlobal::run(Module &module, ModuleAnalysisManager &a if (addrSpace == SPIRAS_Private || addrSpace == SPIRAS_Input || addrSpace == SPIRAS_Output) { // Remove constant indexing expression and remove any proxy variables that are needed. (But the proxies aren't // used yet for inputs/outputs.) - removeConstantExpr(m_context, &global); + convertUsersOfConstantsToInstructions(&global); if (addrSpace == SPIRAS_Private) mapGlobalVariableToProxy(&global); @@ -600,38 +595,27 @@ void SpirvLowerGlobal::mapGlobalVariableToProxy(GlobalVariable *globalVar) { Type *globalVarTy = globalVar->getValueType(); Value *proxy = nullptr; - // Handle special globals, regular allocas will be removed by SROA pass. - if (globalVar->getName().starts_with(RtName::HitAttribute)) { - proxy = m_entryPoint->getArg(1); - globalVar->replaceAllUsesWith(proxy); - } else if (globalVar->getName().starts_with(RtName::IncomingRayPayLoad)) { - proxy = m_entryPoint->getArg(0); - globalVar->replaceAllUsesWith(proxy); - } else if (globalVar->getName().starts_with(RtName::IncomingCallableData)) { - proxy = m_entryPoint->getArg(0); - globalVar->replaceAllUsesWith(proxy); - } else { - // Collect used functions - SmallSet funcs; - for (User *user : globalVar->users()) { - auto inst = cast(user); - funcs.insert(inst->getFunction()); - } - for (Function *func : funcs) { - m_builder->SetInsertPointPastAllocas(func); - proxy = m_builder->CreateAlloca(globalVarTy, dataLayout.getAllocaAddrSpace(), nullptr, - Twine(LlpcName::GlobalProxyPrefix) + globalVar->getName()); - if (globalVar->hasInitializer()) { - auto initializer = globalVar->getInitializer(); - m_builder->CreateStore(initializer, proxy); - } - globalVar->mutateType(proxy->getType()); - globalVar->replaceUsesWithIf(proxy, [func](Use &U) { - Instruction *userInst = cast(U.getUser()); - return userInst->getFunction() == func; - }); + // Collect used functions + SmallSet funcs; + for (User *user : globalVar->users()) { + auto inst = cast(user); + funcs.insert(inst->getFunction()); + } + for (Function *func : funcs) { + m_builder->SetInsertPointPastAllocas(func); + proxy = m_builder->CreateAlloca(globalVarTy, dataLayout.getAllocaAddrSpace(), nullptr, + Twine(LlpcName::GlobalProxyPrefix) + globalVar->getName()); + + if (globalVar->hasInitializer()) { + auto initializer = globalVar->getInitializer(); + m_builder->CreateStore(initializer, proxy); } + globalVar->mutateType(proxy->getType()); + globalVar->replaceUsesWithIf(proxy, [func](Use &U) { + Instruction *userInst = cast(U.getUser()); + return userInst->getFunction() == func; + }); } m_globalsToErase.push_back(globalVar); @@ -1794,13 +1778,19 @@ void SpirvLowerGlobal::lowerBufferBlock() { } bool isAccelerationStructure = false; + bool isAliased = false; MDNode *blockMetaNode = global.getMetadata(gSPIRVMD::Block); if (blockMetaNode) { ShaderBlockMetadata blockMeta = {}; auto blockMetaNodeVal = mdconst::dyn_extract(blockMetaNode->getOperand(0)); - if (auto meta = dyn_cast(blockMetaNodeVal)) + if (auto meta = dyn_cast(blockMetaNodeVal)) { blockMeta.U64All = meta->getZExtValue(); + } else if (auto metaStruct = dyn_cast(blockMetaNodeVal)) { + const Constant *metaStructVal = metaStruct->getOperand(0); + blockMeta.U64All = cast(metaStructVal)->getZExtValue(); + } isAccelerationStructure = blockMeta.IsAccelerationStructure; + isAliased = blockMeta.Aliased; } if (global.getAddressSpace() == SPIRAS_Constant && !isAccelerationStructure) @@ -1811,7 +1801,6 @@ void SpirvLowerGlobal::lowerBufferBlock() { const unsigned descSet = mdconst::dyn_extract(resMetaNode->getOperand(0))->getZExtValue(); const unsigned binding = mdconst::dyn_extract(resMetaNode->getOperand(1))->getZExtValue(); - SmallVector constantUsers; // AtomicCounter is emulated following same impl of SSBO, only qualifier 'offset' will be used in its // MD now. Using a new MD kind to detect it. AtomicCounter's type should be uint, not a structure. @@ -1823,13 +1812,7 @@ void SpirvLowerGlobal::lowerBufferBlock() { cast(mdconst::dyn_extract(atomicCounterMD->getOperand(0)))->getZExtValue(); } - for (User *const user : global.users()) { - if (Constant *const constVal = dyn_cast(user)) - constantUsers.push_back(constVal); - } - - for (Constant *const constVal : constantUsers) - replaceConstWithInsts(m_context, constVal); + convertUsersOfConstantsToInstructions(&global); // Record of all the functions that our global is used within. SmallSet funcsUsedIn; @@ -1841,7 +1824,39 @@ void SpirvLowerGlobal::lowerBufferBlock() { // Collect the instructions to be replaced per-global SmallVector instructionsToReplace; + bool isConstant = false; + bool isReadOnly = true; + for (Function *const func : funcsUsedIn) { + SmallVector worklist; + for (User *const user : global.users()) + worklist.push_back(user); + + while (!worklist.empty()) { + Value *current = worklist.pop_back_val(); + if (auto inst = dyn_cast(current)) { + if (inst->getFunction() != func) + continue; + + if (auto *GEP = dyn_cast(inst)) { + for (auto *gepUser : GEP->users()) + worklist.push_back(gepUser); + continue; + } + + if (auto load = dyn_cast(inst)) + if (!load->isAtomic()) + continue; + + // Anything that is not a load prevents the buffer being treated as readonly. + isReadOnly = false; + break; + } + } + + if (global.isConstant() || (isReadOnly && !isAliased)) + isConstant = true; + // Check if our block is an array of blocks. if (!atomicCounterMD && global.getValueType()->isArrayTy()) { Type *const elementType = global.getValueType()->getArrayElementType(); @@ -1890,7 +1905,7 @@ void SpirvLowerGlobal::lowerBufferBlock() { : m_builder->create(descSet, binding, m_builder->getInt32(0), bufferFlags); // If the global variable is a constant, the data it points to is invariant. - if (global.isConstant()) + if (isConstant) m_builder->CreateInvariantStart(bufferDesc); replaceInstsInfo.otherInst->replaceUsesOfWith(&global, bufferDesc); @@ -2008,7 +2023,7 @@ void SpirvLowerGlobal::lowerBufferBlock() { m_builder->create(descSets[idx], bindings[idx], blockIndex, bufferFlags); } // If the global variable is a constant, the data it points to is invariant. - if (global.isConstant()) + if (isConstant) m_builder->CreateInvariantStart(bufferDescs[idx]); } @@ -2056,12 +2071,7 @@ void SpirvLowerGlobal::lowerBufferBlock() { ? m_builder->CreateGetDescPtr(descTy, descTy, descSet, binding) : m_builder->create(descSet, binding, m_builder->getInt32(0), bufferFlags); - // If the global variable is a constant, the data it points to is invariant. - if (global.isConstant()) - m_builder->CreateInvariantStart(bufferDesc); - SmallVector usesToReplace; - for (User *const user : global.users()) { // Skip over non-instructions that we've already made useless. if (!isa(user)) @@ -2076,6 +2086,10 @@ void SpirvLowerGlobal::lowerBufferBlock() { usesToReplace.push_back(inst); } + // If the global variable is a constant, the data it points to is invariant. + if (isConstant) + m_builder->CreateInvariantStart(bufferDesc); + Value *newLoadPtr = bufferDesc; if (atomicCounterMD) { SmallVector indices; @@ -2189,15 +2203,7 @@ void SpirvLowerGlobal::lowerPushConsts() { // There should only be a single push constant variable! assert(globalsToRemove.empty()); - SmallVector constantUsers; - - for (User *const user : global.users()) { - if (Constant *const constVal = dyn_cast(user)) - constantUsers.push_back(constVal); - } - - for (Constant *const constVal : constantUsers) - replaceConstWithInsts(m_context, constVal); + convertUsersOfConstantsToInstructions(&global); // Record of all the functions that our global is used within. SmallSet funcsUsedIn; @@ -2255,15 +2261,7 @@ void SpirvLowerGlobal::lowerUniformConstants() { if (global.getAddressSpace() != SPIRAS_Uniform || !global.hasMetadata(gSPIRVMD::UniformConstant)) continue; - SmallVector constantUsers; - - for (User *const user : global.users()) { - if (Constant *const constVal = dyn_cast(user)) - constantUsers.push_back(constVal); - } - - for (Constant *const constVal : constantUsers) - replaceConstWithInsts(m_context, constVal); + convertUsersOfConstantsToInstructions(&global); // A map from the function to the instructions inside it which access the global variable. SmallMapVector, 8> globalUsers; @@ -2469,7 +2467,7 @@ void SpirvLowerGlobal::lowerShaderRecordBuffer() { if (!global.getName().starts_with(ShaderRecordBuffer)) continue; - removeConstantExpr(m_context, &global); + convertUsersOfConstantsToInstructions(&global); m_builder->SetInsertPointPastAllocas(m_entryPoint); auto shaderRecordBufferPtr = m_builder->create(m_builder->create()); @@ -2560,11 +2558,14 @@ void SpirvLowerGlobal::changeRtFunctionSignature() { Type *pointerTy = PointerType::get(*m_context, SPIRAS_Private); switch (m_shaderStage) { case ShaderStageRayTracingIntersect: + // We don't have hit attribute in argument for IS in continuations mode. + if (rayTracingContext->isContinuationsMode()) + break; + LLVM_FALLTHROUGH; // Fall through: Legacy RT still requires hit attribute in argument case ShaderStageRayTracingAnyHit: case ShaderStageRayTracingClosestHit: // Hit attribute argTys.push_back(pointerTy); - setShaderHitAttributeSize(m_entryPoint, rayTracingContext->getAttributeDataSizeInBytes()); LLVM_FALLTHROUGH; // Fall through: Handle payload case ShaderStageRayTracingMiss: // Payload @@ -2590,6 +2591,67 @@ void SpirvLowerGlobal::changeRtFunctionSignature() { assert(m_entryPoint->use_empty()); m_entryPoint->eraseFromParent(); m_entryPoint = newFunc; + + GlobalVariable *hitAttributeVar = nullptr; + GlobalVariable *incomingPayloadVar = nullptr; + GlobalVariable *incomingCallableDataVar = nullptr; + + // NOTE: There could be multiple definitions of these variables in a SPIR-V file, but there could only have one of + // each in used in current entry point. + for (auto &global : m_module->globals()) { + if (global.getNumUses() == 0) + continue; + if (global.getName().starts_with(SPIRVStorageClassNameMap::map(StorageClassHitAttributeKHR))) { + assert(hitAttributeVar == nullptr); + hitAttributeVar = &global; + } else if (global.getName().starts_with(SPIRVStorageClassNameMap::map(StorageClassIncomingRayPayloadKHR))) { + assert(incomingPayloadVar == nullptr); + incomingPayloadVar = &global; + } else if (global.getName().starts_with(SPIRVStorageClassNameMap::map(StorageClassIncomingCallableDataKHR))) { + assert(incomingCallableDataVar == nullptr); + incomingCallableDataVar = &global; + } + } + + if (hitAttributeVar && m_entryPoint->arg_size() == 2) { + assert(!rayTracingContext->isContinuationsMode() || m_shaderStage != ShaderStageRayTracingIntersect); + convertUsersOfConstantsToInstructions(hitAttributeVar); + hitAttributeVar->replaceAllUsesWith(m_entryPoint->getArg(1)); + m_globalsToErase.push_back(hitAttributeVar); + } + + if (incomingPayloadVar) { + convertUsersOfConstantsToInstructions(incomingPayloadVar); + incomingPayloadVar->replaceAllUsesWith(m_entryPoint->getArg(0)); + m_globalsToErase.push_back(incomingPayloadVar); + } else if (incomingCallableDataVar) { + convertUsersOfConstantsToInstructions(incomingCallableDataVar); + incomingCallableDataVar->replaceAllUsesWith(m_entryPoint->getArg(0)); + m_globalsToErase.push_back(incomingCallableDataVar); + } + + if (rayTracingContext->isContinuationsMode()) { + SmallVector contArgTys; + + auto var = m_shaderStage == ShaderStageRayTracingCallable ? incomingCallableDataVar : incomingPayloadVar; + auto payloadTy = var ? var->getValueType() : StructType::get(*m_context); + if (!isa(payloadTy)) + payloadTy = StructType::get(*m_context, {payloadTy}, false); + contArgTys.push_back(ContArgTy(pointerTy, payloadTy)); + if ((m_shaderStage == ShaderStageRayTracingAnyHit) || (m_shaderStage == ShaderStageRayTracingClosestHit)) { + auto type = ArrayType::get(m_builder->getInt32Ty(), rayTracingContext->getAttributeDataSize()); + contArgTys.push_back(ContArgTy(pointerTy, type)); + } + + ContFuncTy contFuncTy(m_builder->getVoidTy(), contArgTys); + contFuncTy.writeMetadata(newFunc); + } + + for (auto globalVar : m_globalsToErase) { + globalVar->dropAllReferences(); + globalVar->eraseFromParent(); + } + m_globalsToErase.clear(); } } // namespace Llpc diff --git a/llpc/lower/llpcSpirvLowerInternalLibraryIntrinsicUtil.cpp b/llpc/lower/llpcSpirvLowerInternalLibraryIntrinsicUtil.cpp index 32f12fda05..c8c644855c 100644 --- a/llpc/lower/llpcSpirvLowerInternalLibraryIntrinsicUtil.cpp +++ b/llpc/lower/llpcSpirvLowerInternalLibraryIntrinsicUtil.cpp @@ -348,6 +348,24 @@ static void createStoreDwordAtAddrUncached(Function *func, Builder *builder) { createLoadStore(func, builder, false, true); } +// ===================================================================================================================== +// Create coordinates of the current fragment +// +// @param func : The function to process +// @param builder : The IR builder +static void createFragCoord(Function *func, Builder *builder) { + builder->CreateRet(builder->CreateReadBuiltInInput(lgc::BuiltInFragCoord, {}, nullptr, nullptr, "")); +} + +// ===================================================================================================================== +// Create sample ID of the current fragment +// +// @param func : The function to process +// @param builder : The IR builder +static void createSampleId(Function *func, Builder *builder) { + builder->CreateRet(builder->CreateReadBuiltInInput(lgc::BuiltInSampleId, {}, nullptr, nullptr, "")); +} + // ===================================================================================================================== // Initialize library function pointer table InternalLibraryIntrinsicUtil::LibraryFunctionTable::LibraryFunctionTable() { @@ -375,6 +393,8 @@ InternalLibraryIntrinsicUtil::LibraryFunctionTable::LibraryFunctionTable() { m_libFuncPtrs["AmdExtLoadDwordAtAddrUncached"] = &createLoadDwordAtAddrUncached; m_libFuncPtrs["AmdExtStoreDwordAtAddr"] = &createStoreDwordAtAddr; m_libFuncPtrs["AmdExtStoreDwordAtAddrUncached"] = &createStoreDwordAtAddrUncached; + m_libFuncPtrs["AmdExtFragCoord"] = &createFragCoord; + m_libFuncPtrs["AmdExtSampleId"] = &createSampleId; } } // namespace Llpc diff --git a/llpc/lower/llpcSpirvLowerMath.cpp b/llpc/lower/llpcSpirvLowerMath.cpp index a9eb91a5c0..95876db8be 100644 --- a/llpc/lower/llpcSpirvLowerMath.cpp +++ b/llpc/lower/llpcSpirvLowerMath.cpp @@ -536,34 +536,111 @@ void SpirvLowerMathFloatOp::visitBinaryOperator(BinaryOperator &binaryOp) { // Replace mul with amdgcn_fmul_legacy intrinsic when detect patterns like: // ((b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b)) if (opCode == Instruction::FMul) { - Value *src1CmpValue = nullptr; - Value *src1FalseValue = nullptr; - Value *src2CmpValue = nullptr; - Value *src2FalseValue = nullptr; - FCmpInst::Predicate pred = FCmpInst::FCMP_OEQ; - // Detect whether A = (b==0.0 ? 0.0 : a) and parse out b and a - bool src1Match = - match(src1, m_Select(m_FCmp(pred, m_Value(src1CmpValue), m_AnyZeroFP()), m_Zero(), m_Value(src1FalseValue))); - // Detect whether B = (a'==0.0 ? 0.0 : b') and output a' and b' - bool src2Match = - match(src2, m_Select(m_FCmp(pred, m_Value(src2CmpValue), m_AnyZeroFP()), m_Zero(), m_Value(src2FalseValue))); - // If b == b' && a == a' then use fmul_legacy(a,b) instead of fmul(A,B) - if (src1Match && src2Match) { - if ((src1CmpValue == src2FalseValue) && (src2CmpValue == src1FalseValue)) { - IRBuilder<> builder(*m_context); - builder.SetInsertPoint(&binaryOp); - builder.setFastMathFlags(binaryOp.getFastMathFlags()); - Value *fmulzResult = - builder.CreateIntrinsic(Intrinsic::amdgcn_fmul_legacy, {}, {src1FalseValue, src2FalseValue}); - binaryOp.replaceAllUsesWith(fmulzResult); - binaryOp.dropAllReferences(); - binaryOp.eraseFromParent(); - - m_changed = true; - return; - } + emitFFmulzInst(binaryOp); + } +} + +// ===================================================================================================================== +// Replace mul with amdgcn_fmul_legacy intrinsic when detect patterns like: +// ((b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b)) +// @param binaryOp : Binary operator instruction +void SpirvLowerMathFloatOp::emitFFmulzInst(BinaryOperator &binaryOp) { + auto src1 = binaryOp.getOperand(0); + auto src2 = binaryOp.getOperand(1); + FastMathFlags fastMathFlags = binaryOp.getFastMathFlags(); + auto matchValue = isMulDx9Zero(src1, src2, fastMathFlags); + if (matchValue != std::nullopt) { + IRBuilder<> builder(*m_context); + builder.SetInsertPoint(&binaryOp); + builder.setFastMathFlags(binaryOp.getFastMathFlags()); + Value *transformSrc1 = matchValue->first; + Value *transformSrc2 = matchValue->second; + Value *fmulzResult = builder.CreateIntrinsic(Intrinsic::amdgcn_fmul_legacy, {}, {transformSrc1, transformSrc2}); + + m_changed = true; + binaryOp.replaceAllUsesWith(fmulzResult); + binaryOp.dropAllReferences(); + binaryOp.eraseFromParent(); + } +} + +// ===================================================================================================================== +// Replace fma with amdgcn_fma_legacy intrinsic when detect patterns like: +// fma((b==0.0 ? 0.0 : a), (a==0.0 ? 0.0 : b), c) +// @param inst : Instruction to be replaced if needed +void SpirvLowerMathFloatOp::emitFFmazInst(Instruction *inst) { + assert(inst); + CallInst *fmaCallInst = dyn_cast(inst); + Value *src1 = fmaCallInst->getArgOperand(0); + Value *src2 = fmaCallInst->getArgOperand(1); + FastMathFlags fastMathFlags = inst->getFastMathFlags(); + auto matchValue = isMulDx9Zero(src1, src2, fastMathFlags); + if (matchValue != std::nullopt) { + IRBuilder<> builder(*m_context); + builder.SetInsertPoint(inst); + builder.setFastMathFlags(inst->getFastMathFlags()); + Value *transformSrc1 = matchValue->first; + Value *transformSrc2 = matchValue->second; + Value *src3 = fmaCallInst->getArgOperand(2); + Value *ffmazResult = + builder.CreateIntrinsic(Intrinsic::amdgcn_fma_legacy, {}, {transformSrc1, transformSrc2, src3}); + + m_changed = true; + inst->replaceAllUsesWith(ffmazResult); + inst->dropAllReferences(); + inst->eraseFromParent(); + } +} + +// ===================================================================================================================== +// Checks whether a multiply of lhs with rhs using the given fast-math flags can be transformed into a multiply +// with DX9 zero semantics. If so, returns a pair of operands for the new multiply. +// @param lhs : left operand for the operation +// @param rhs: right operand for the operation +// @param fastMathFlags: fastmath flags for the opreration +std::optional> SpirvLowerMathFloatOp::isMulDx9Zero(Value *lhs, Value *rhs, + FastMathFlags fastMathFlags) { + Value *lhsCmpValue = nullptr; + Value *lhsFalseValue = nullptr; + Value *rhsCmpValue = nullptr; + Value *rhsFalseValue = nullptr; + FCmpInst::Predicate pred = FCmpInst::FCMP_OEQ; + + // If the fast math flags might have INFs, when a = intf then a == 0 ? 0.0 : b is b and a * b = inf * 0 = nan + // This is incorrect so it needs to add related check here + if (!fastMathFlags.noInfs()) + return std::nullopt; + + // Only transform for float32. + if (!(lhs->getType()->isFloatTy() && rhs->getType()->isFloatTy())) + return std::nullopt; + + // Detect whether A = (b==0.0 ? 0.0 : a) and parse out b and a + bool lhsMatch = + match(lhs, m_Select(m_FCmp(pred, m_Value(lhsCmpValue), m_AnyZeroFP()), m_Zero(), m_Value(lhsFalseValue))); + // Detect whether B = (a'==0.0 ? 0.0 : b') and output a' and b' + bool rhsMatch = + match(rhs, m_Select(m_FCmp(pred, m_Value(rhsCmpValue), m_AnyZeroFP()), m_Zero(), m_Value(rhsFalseValue))); + + // If b == b' && a == a' then use fmul_legacy(a,b) instead of fmul(A,B) + if (lhsMatch && rhsMatch && (lhsCmpValue == rhsFalseValue) && (rhsCmpValue == lhsFalseValue)) { + return std::make_pair(lhsFalseValue, rhsFalseValue); + } + if (lhsMatch && (lhsCmpValue == rhs)) { + if (auto *constLhsFalseValue = dyn_cast(lhsFalseValue); + constLhsFalseValue && !constLhsFalseValue->isZero()) { + // Detect pattern: ((b==0.0 ? 0.0 : a) * b) when a is constant but not zero. + return std::make_pair(lhsFalseValue, rhs); + } + } + if (rhsMatch && (lhs == rhsCmpValue)) { + if (auto *constRhsFalseValue = dyn_cast(rhsFalseValue); + constRhsFalseValue && !constRhsFalseValue->isZero()) { + // Detect pattern: (a * (a==0.0 ? 0.0 : b)) when b is constant but not zero. + return std::make_pair(lhs, rhsFalseValue); } } + return std::nullopt; } // ===================================================================================================================== @@ -579,6 +656,13 @@ void SpirvLowerMathFloatOp::visitCallInst(CallInst &callInst) { // NOTE: FABS will be optimized by backend compiler with sign bit removed via AND. flushDenormIfNeeded(&callInst); } + + // Replace fma with amdgcn_fma_legacy intrinsic when detect patterns like: + // fma((b==0.0 ? 0.0 : a), (a==0.0 ? 0.0 : b), c) + auto mangledName = callee->getName(); + if (mangledName.startswith("lgc.create.fma")) { + emitFFmazInst(&callInst); + } } // ===================================================================================================================== diff --git a/llpc/lower/llpcSpirvLowerMath.h b/llpc/lower/llpcSpirvLowerMath.h index 200be86dc0..6b41e51d5c 100644 --- a/llpc/lower/llpcSpirvLowerMath.h +++ b/llpc/lower/llpcSpirvLowerMath.h @@ -47,7 +47,6 @@ class SpirvLowerMath : public SpirvLower { void init(llvm::Module &module); void flushDenormIfNeeded(llvm::Instruction *inst); - bool m_changed; // Whether the module is changed bool m_fp16DenormFlush; // Whether FP mode wants f16 denorms to be flushed to zero bool m_fp32DenormFlush; // Whether FP mode wants f32 denorms to be flushed to zero @@ -93,7 +92,10 @@ class SpirvLowerMathFloatOp : public SpirvLowerMath, virtual void visitBinaryOperator(llvm::BinaryOperator &binaryOp); virtual void visitCallInst(llvm::CallInst &callInst); virtual void visitFPTruncInst(llvm::FPTruncInst &fptruncInst); - + void emitFFmulzInst(llvm::BinaryOperator &binaryOp); + void emitFFmazInst(llvm::Instruction *inst); + std::optional> isMulDx9Zero(llvm::Value *lhs, llvm::Value *rhs, + llvm::FastMathFlags flags); static llvm::StringRef name() { return "Lower SPIR-V math floating point optimisation"; } }; diff --git a/llpc/lower/llpcSpirvLowerRayTracing.cpp b/llpc/lower/llpcSpirvLowerRayTracing.cpp index b95541eea6..b054559c20 100644 --- a/llpc/lower/llpcSpirvLowerRayTracing.cpp +++ b/llpc/lower/llpcSpirvLowerRayTracing.cpp @@ -1741,7 +1741,7 @@ Instruction *SpirvLowerRayTracing::createEntryFunc(Function *func) { assert((m_shaderStage == ShaderStageRayTracingIntersect) || (m_shaderStage == ShaderStageRayTracingAnyHit) || (m_shaderStage == ShaderStageRayTracingClosestHit)); func->getArg(1)->replaceAllUsesWith(m_traceParams[TraceParam::HitAttributes]); - setShaderHitAttributeSize(newFunc, getShaderHitAttributeSize(func)); + setShaderHitAttributeSize(newFunc, getShaderHitAttributeSize(func).value_or(0)); } // Transfer code from old entry function to the new entry function diff --git a/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp b/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp index 37a5f8f1fb..a94af2b46a 100644 --- a/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp +++ b/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp @@ -43,6 +43,7 @@ #include "lgc/LgcCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #define DEBUG_TYPE "llpc-spirv-lower-gpurt-library" using namespace lgc; @@ -100,6 +101,7 @@ SpirvProcessGpuRtLibrary::LibraryFunctionTable::LibraryFunctionTable() { #else m_libFuncPtrs["AmdExtD3DShaderIntrinsics_IntersectInternal"] = &SpirvProcessGpuRtLibrary::createIntersectBvh; #endif + m_libFuncPtrs["AmdExtD3DShaderIntrinsics_ShaderMarker"] = &SpirvProcessGpuRtLibrary::createShaderMarker; m_libFuncPtrs["AmdExtD3DShaderIntrinsics_FloatOpWithRoundMode"] = &SpirvProcessGpuRtLibrary::createFloatOpWithRoundMode; m_libFuncPtrs["AmdExtDispatchThreadIdFlat"] = &SpirvProcessGpuRtLibrary::createDispatchThreadIdFlat; @@ -174,39 +176,6 @@ void SpirvProcessGpuRtLibrary::processLibraryFunction(Function *&func) { assert(!getObjToWorldTrans.empty()); assert(!getWorldToObjTrans.empty()); - bool isAmdAwaitLike = funcName.starts_with("_AmdAwait") || funcName.starts_with("_AmdWaitAwait"); - if (funcName.starts_with("_cont_") || isAmdAwaitLike) { - func->setLinkage(GlobalValue::WeakAnyLinkage); - // Delete function body of _Amd*Await, it will be handled in LowerRaytracingPipeline. - if (isAmdAwaitLike) - func->deleteBody(); - - // The function might not have types metadata like _cont_SetupRayGen or _AmdAwait which is a declaration, nothing - // needs to be done. - if (!func->getMetadata(ContHelper::MDTypesName)) - return; - - SmallBitVector promotionMask(func->arg_size()); - for (unsigned argNo = 0; argNo < func->arg_size(); argNo++) { - auto *arg = func->getArg(argNo); - ContArgTy argTy = ContArgTy::get(func, arg); - auto funcName = func->getName(); - - if (!argTy.isPointerTy()) - continue; - - // Change the pointer type to its value type for non-struct types. - // Amd*Await, use value types for all arguments. - // For _cont_SetTriangleHitAttributes, we always use its value type for hitAttributes argument. - if (!isa(argTy.getPointerElementType()) || isAmdAwaitLike || - (funcName == ContDriverFunc::SetTriangleHitAttributesName && argNo == 1)) - promotionMask.set(argNo); - } - - promotePointerArguments(func, promotionMask); - return; - } - // Set external linkage for library entry functions if (funcName.starts_with(traceRayFuncName) || funcName.starts_with(rayQueryInitializeFuncName) || funcName.starts_with(rayQueryProceedFuncName) || @@ -242,14 +211,11 @@ void SpirvProcessGpuRtLibrary::processLibraryFunction(Function *&func) { return; } else if (funcName.starts_with("_AmdGetUninitialized")) { m_builder->SetInsertPoint(clearBlock(func)); - m_builder->CreateRet(PoisonValue::get(func->getReturnType())); + Value *FrozenPoison = m_builder->CreateFreeze(PoisonValue::get(func->getReturnType())); + m_builder->CreateRet(FrozenPoison); return; - } else if (funcName.starts_with("_AmdGetShaderKind") || funcName.starts_with("_AmdGetCurrentFuncAddr") || - funcName.starts_with("_AmdGetResumePointAddr")) { - // These _Amd* functions are handled in later continuation transformations, delete the function body to preserve the - // call. - func->deleteBody(); - func->setLinkage(GlobalValue::WeakAnyLinkage); + } else if (funcName.starts_with("_AmdRestoreSystemData")) { + // We don't need this, leave it as dummy function so that it does nothing. return; } @@ -269,6 +235,58 @@ void SpirvProcessGpuRtLibrary::processLibraryFunction(Function *&func) { auto funcPtr = commonFuncIt->second; m_builder->SetInsertPoint(clearBlock(func)); (*funcPtr)(func, m_builder); + return; + } + + bool isAmdAwaitLike = funcName.starts_with("_AmdAwait") || funcName.starts_with("_AmdWaitAwait"); + if (funcName.starts_with("_cont_") || funcName.starts_with("_Amd")) { + func->setLinkage(GlobalValue::WeakAnyLinkage); + + // Skip _AmdAwaitTraversal function resulting from calls to _AmdWaitAwaitTraversal. + if (!func->hasMetadata(ContHelper::MDTypesName) && !func->arg_empty()) + return; + + SmallBitVector promotionMask(func->arg_size()); + for (unsigned argNo = 0; argNo < func->arg_size(); argNo++) { + auto *arg = func->getArg(argNo); + ContArgTy argTy = ContArgTy::get(func, arg); + auto funcName = func->getName(); + + if (!argTy.isPointerTy()) + continue; + + // Change the pointer type to its value type for non-struct types. + // Amd*Await, use value types for all arguments. + // For _cont_SetTriangleHitAttributes, we always use its value type for hitAttributes argument. + if (!isa(argTy.getPointerElementType()) || isAmdAwaitLike || + (funcName == ContDriverFunc::SetTriangleHitAttributesName && argNo == 1)) + promotionMask.set(argNo); + } + + auto *newFunc = promotePointerArguments(func, promotionMask); + + // Delete function body of _Amd* intrinsics that survive here, they will be handled in LowerRaytracingPipeline. + if (funcName.starts_with("_Amd")) + newFunc->deleteBody(); + + if (newFunc->getName().starts_with("_AmdWaitAwait")) { + llvm::forEachCall(*newFunc, [&](CallInst &CInst) { + SmallVector args(CInst.args()); + // NOTE: Theoretically we should remove the wait mask so that the function signature matches + // _AmdAwait*(addr, returnAddr, SystemData, ...). However, _AmdWaitAwaitTraversal's arguments are defined as + // (addr, waitMask, SystemData, ...), thus we need to keep the waitMask as a dummy returnAddr so that + // LowerRaytracingPipeline can handle it correctly. + if (!newFunc->getName().starts_with("_AmdWaitAwaitTraversal")) + args.erase(args.begin() + 1); + + m_builder->SetInsertPoint(&CInst); + auto *newValue = m_builder->CreateNamedCall("_AmdAwait", CInst.getType(), args, {}); + CInst.replaceAllUsesWith(newValue); + CInst.eraseFromParent(); + }); + } + + return; } } @@ -608,19 +626,25 @@ Value *SpirvProcessGpuRtLibrary::createGetBvhSrd(llvm::Value *expansion, llvm::V // // @param func : The function to create void SpirvProcessGpuRtLibrary::createSampleGpuTimer(llvm::Function *func) { - Value *timerHiPtr = func->getArg(0); - Value *timerLoPtr = func->getArg(1); + if (func->arg_size() == 2) { + Value *timerHiPtr = func->getArg(0); + Value *timerLoPtr = func->getArg(1); - Value *const readClock = m_builder->CreateReadClock(true); - Value *clocksLo = m_builder->CreateAnd(readClock, m_builder->getInt64(UINT32_MAX)); - clocksLo = m_builder->CreateTrunc(clocksLo, m_builder->getInt32Ty()); - Value *clocksHi = m_builder->CreateLShr(readClock, m_builder->getInt64(32)); - clocksHi = m_builder->CreateTrunc(clocksHi, m_builder->getInt32Ty()); + Value *const readClock = m_builder->CreateReadClock(true); + Value *clocksLo = m_builder->CreateAnd(readClock, m_builder->getInt64(UINT32_MAX)); + clocksLo = m_builder->CreateTrunc(clocksLo, m_builder->getInt32Ty()); + Value *clocksHi = m_builder->CreateLShr(readClock, m_builder->getInt64(32)); + clocksHi = m_builder->CreateTrunc(clocksHi, m_builder->getInt32Ty()); - m_builder->CreateStore(clocksLo, timerLoPtr); - m_builder->CreateStore(clocksHi, timerHiPtr); + m_builder->CreateStore(clocksLo, timerLoPtr); + m_builder->CreateStore(clocksHi, timerHiPtr); - m_builder->CreateRetVoid(); + m_builder->CreateRetVoid(); + } else { + assert(func->arg_empty()); + Value *const readClock = m_builder->CreateReadClock(true); + m_builder->CreateRet(readClock); + } } // ===================================================================================================================== @@ -928,4 +952,14 @@ void SpirvProcessGpuRtLibrary::createGetRtip(llvm::Function *func) { m_builder->CreateRet(m_builder->getInt32(rtip.major * 10 + rtip.minor)); } +// ===================================================================================================================== +// Fill in function to write shader marker +// +// @param func : The function to create +void SpirvProcessGpuRtLibrary::createShaderMarker(llvm::Function *func) { + Value *dataPtr = m_builder->CreateLoad(m_builder->getInt32Ty(), func->getArg(0)); + m_builder->CreateIntrinsic(Intrinsic::amdgcn_s_ttracedata, {}, dataPtr); + m_builder->CreateRetVoid(); +} + } // namespace Llpc diff --git a/llpc/lower/llpcSpirvProcessGpuRtLibrary.h b/llpc/lower/llpcSpirvProcessGpuRtLibrary.h index 07a32ab10e..fa5d864134 100644 --- a/llpc/lower/llpcSpirvProcessGpuRtLibrary.h +++ b/llpc/lower/llpcSpirvProcessGpuRtLibrary.h @@ -100,6 +100,7 @@ class SpirvProcessGpuRtLibrary : public SpirvLower, public llvm::PassInfoMixin], { <4 x i32>, i32 } }, align 32, addrspace(5) -; SHADERTEST-NEXT: %_12 = alloca i64, align 8, addrspace(5) -; SHADERTEST-NEXT: [[TMP:%.*]] = load i64, ptr addrspace(64) @_4, align 8 -; SHADERTEST-NEXT: store i64 %1, ptr addrspace(5) %_12, align 8 -; SHADERTEST-NEXT: [[TMP:%.*]] = load i64, ptr addrspace(5) %_12, align 8 -; SHADERTEST-NEXT: call void @spirv.NonUniform.i64(i64 %2) -; SHADERTEST-NEXT: [[TMP:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 7) -; SHADERTEST-NEXT: [[TMP:%.*]] = call i32 (...) @lgc.create.get.desc.stride.i32(i32 1, i32 1, i64 0, i32 7) -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } poison, ptr addrspace(4) %3, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } %5, i32 %4, 1 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } %6, i32 32, 2 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } %7, i32 1, 3 -; SHADERTEST-NEXT: [[TMP:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 7) -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { ptr addrspace(4), i32, i32 } zeroinitializer, ptr addrspace(4) %9, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = call i32 (...) @lgc.create.get.desc.stride.i32(i32 2, i32 2, i64 0, i32 7) -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { ptr addrspace(4), i32, i32 } %10, i32 %11, 1 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } poison, { ptr addrspace(4), i32, i32, i32 } %8, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } %13, { ptr addrspace(4), i32, i32 } %12, 1 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } %14, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } %14, 1 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { ptr addrspace(4), i32, i32, i32 } %15, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { ptr addrspace(4), i32, i32, i32 } %15, 1 -; SHADERTEST-NEXT: [[TMP:%.*]] = trunc i64 %2 to i32 -; SHADERTEST-NEXT: [[TMP:%.*]] = mul i32 %19, %18 -; SHADERTEST-NEXT: [[TMP:%.*]] = getelementptr i8, ptr addrspace(4) %17, i32 %20 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } %15, ptr addrspace(4) %21, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } %16, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } %16, 1 -; SHADERTEST-NEXT: [[TMP:%.*]] = trunc i64 %2 to i32 -; SHADERTEST-NEXT: [[TMP:%.*]] = mul i32 %25, %24 -; SHADERTEST-NEXT: [[TMP:%.*]] = getelementptr i8, ptr addrspace(4) %23, i32 %26 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { ptr addrspace(4), i32, i32 } %16, ptr addrspace(4) %27, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } poison, { ptr addrspace(4), i32, i32, i32 } %22, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } %29, { ptr addrspace(4), i32, i32 } %28, 1 -; SHADERTEST-NEXT: call void @"spirv.NonUniform.s[s[p4,i32,i32,i32],s[p4,i32,i32]]"({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } %30) -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } %30, 1 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } %31, 2 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } %31, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = load <4 x i32>, ptr addrspace(4) %33, align 16, !invariant.load !4 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { <4 x i32>, i32 } poison, <4 x i32> %34, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { <4 x i32>, i32 } %35, i32 %32, 1 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } %30, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { ptr addrspace(4), i32, i32, i32 } %37, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = load <8 x i32>, ptr addrspace(4) %38, align 32, !invariant.load !4 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue [3 x <8 x i32>] poison, <8 x i32> %39, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { [3 x <8 x i32>], { <4 x i32>, i32 } } poison, [3 x <8 x i32>] %40, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = insertvalue { [3 x <8 x i32>], { <4 x i32>, i32 } } %41, { <4 x i32>, i32 } %36, 1 -; SHADERTEST-NEXT: call void @"spirv.NonUniform.s[a3v8i32,s[v4i32,i32]]"({ [3 x <8 x i32>], { <4 x i32>, i32 } } %42) -; SHADERTEST-NEXT: store { [3 x <8 x i32>], { <4 x i32>, i32 } } %42, ptr addrspace(5) %0, align 32 -; SHADERTEST-NEXT: [[TMP:%.*]] = load { [3 x <8 x i32>], { <4 x i32>, i32 } }, ptr addrspace(5) %0, align 32 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { [3 x <8 x i32>], { <4 x i32>, i32 } } %43, 1 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { [3 x <8 x i32>], { <4 x i32>, i32 } } %43, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue [3 x <8 x i32>] %45, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = extractvalue { <4 x i32>, i32 } %44, 0 -; SHADERTEST-NEXT: [[TMP:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %46, <4 x i32> %47, i32 1, <2 x float> zeroinitializer) +; SHADERTEST-NEXT: [[TMP0:%.*]] = alloca { [3 x <8 x i32>], { <4 x i32>, i32 } }, align 32, addrspace(5) +; SHADERTEST-NEXT: [[_12:%.*]] = alloca i64, align 8, addrspace(5) +; SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 7) +; SHADERTEST-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.create.get.desc.stride.i32(i32 1, i32 1, i64 0, i32 7) +; SHADERTEST-NEXT: [[TMP3:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } poison, ptr addrspace(4) [[TMP1]], 0 +; SHADERTEST-NEXT: [[TMP4:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } [[TMP3]], i32 [[TMP2]], 1 +; SHADERTEST-NEXT: [[TMP5:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } [[TMP4]], i32 32, 2 +; SHADERTEST-NEXT: [[TMP6:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } [[TMP5]], i32 1, 3 +; SHADERTEST-NEXT: [[TMP7:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 7) +; SHADERTEST-NEXT: [[TMP8:%.*]] = insertvalue { ptr addrspace(4), i32, i32 } zeroinitializer, ptr addrspace(4) [[TMP7]], 0 +; SHADERTEST-NEXT: [[TMP9:%.*]] = call i32 (...) @lgc.create.get.desc.stride.i32(i32 2, i32 2, i64 0, i32 7) +; SHADERTEST-NEXT: [[TMP10:%.*]] = insertvalue { ptr addrspace(4), i32, i32 } [[TMP8]], i32 [[TMP9]], 1 +; SHADERTEST-NEXT: [[TMP11:%.*]] = insertvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } poison, { ptr addrspace(4), i32, i32, i32 } [[TMP6]], 0 +; SHADERTEST-NEXT: [[_11:%.*]] = insertvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[TMP11]], { ptr addrspace(4), i32, i32 } [[TMP10]], 1 +; SHADERTEST-NEXT: [[TMP12:%.*]] = load i64, ptr addrspace(64) @_4, align 8 +; SHADERTEST-NEXT: store i64 [[TMP12]], ptr addrspace(5) [[_12]], align 8 +; SHADERTEST-NEXT: [[TMP13:%.*]] = load i64, ptr addrspace(5) [[_12]], align 8 +; SHADERTEST-NEXT: call void @spirv.NonUniform.i64(i64 [[TMP13]]) +; SHADERTEST-NEXT: [[TMP14:%.*]] = trunc i64 [[TMP13]] to i32 +; SHADERTEST-NEXT: [[TMP15:%.*]] = getelementptr [4294967295 x i8], ptr null, i32 0, i32 [[TMP14]] +; SHADERTEST-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[TMP15]] to i32 +; SHADERTEST-NEXT: [[TMP17:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[_11]], 0 +; SHADERTEST-NEXT: [[TMP18:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[_11]], 1 +; SHADERTEST-NEXT: [[TMP19:%.*]] = extractvalue { ptr addrspace(4), i32, i32, i32 } [[TMP17]], 0 +; SHADERTEST-NEXT: [[TMP20:%.*]] = extractvalue { ptr addrspace(4), i32, i32, i32 } [[TMP17]], 1 +; SHADERTEST-NEXT: [[TMP21:%.*]] = mul i32 [[TMP16]], [[TMP20]] +; SHADERTEST-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP19]], i32 [[TMP21]] +; SHADERTEST-NEXT: [[TMP23:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } [[TMP17]], ptr addrspace(4) [[TMP22]], 0 +; SHADERTEST-NEXT: [[TMP24:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } [[TMP18]], 0 +; SHADERTEST-NEXT: [[TMP25:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } [[TMP18]], 1 +; SHADERTEST-NEXT: [[TMP26:%.*]] = mul i32 [[TMP16]], [[TMP25]] +; SHADERTEST-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP24]], i32 [[TMP26]] +; SHADERTEST-NEXT: [[TMP28:%.*]] = insertvalue { ptr addrspace(4), i32, i32 } [[TMP18]], ptr addrspace(4) [[TMP27]], 0 +; SHADERTEST-NEXT: [[TMP29:%.*]] = insertvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } poison, { ptr addrspace(4), i32, i32, i32 } [[TMP23]], 0 +; SHADERTEST-NEXT: [[TMP30:%.*]] = insertvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[TMP29]], { ptr addrspace(4), i32, i32 } [[TMP28]], 1 +; SHADERTEST-NEXT: call void @"spirv.NonUniform.s[s[p4,i32,i32,i32],s[p4,i32,i32]]"({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[TMP30]]) +; SHADERTEST-NEXT: [[TMP31:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[TMP30]], 1 +; SHADERTEST-NEXT: [[TMP32:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } [[TMP31]], 2 +; SHADERTEST-NEXT: [[TMP33:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } [[TMP31]], 0 +; SHADERTEST-NEXT: [[TMP34:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP33]], align 16, !invariant.load !4 +; SHADERTEST-NEXT: [[TMP35:%.*]] = insertvalue { <4 x i32>, i32 } poison, <4 x i32> [[TMP34]], 0 +; SHADERTEST-NEXT: [[TMP36:%.*]] = insertvalue { <4 x i32>, i32 } [[TMP35]], i32 [[TMP32]], 1 +; SHADERTEST-NEXT: [[TMP37:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[TMP30]], 0 +; SHADERTEST-NEXT: [[TMP38:%.*]] = extractvalue { ptr addrspace(4), i32, i32, i32 } [[TMP37]], 0 +; SHADERTEST-NEXT: [[TMP39:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP38]], align 32, !invariant.load !4 +; SHADERTEST-NEXT: [[TMP40:%.*]] = insertvalue [3 x <8 x i32>] poison, <8 x i32> [[TMP39]], 0 +; SHADERTEST-NEXT: [[TMP41:%.*]] = insertvalue { [3 x <8 x i32>], { <4 x i32>, i32 } } poison, [3 x <8 x i32>] [[TMP40]], 0 +; SHADERTEST-NEXT: [[TMP42:%.*]] = insertvalue { [3 x <8 x i32>], { <4 x i32>, i32 } } [[TMP41]], { <4 x i32>, i32 } [[TMP36]], 1 +; SHADERTEST-NEXT: call void @"spirv.NonUniform.s[a3v8i32,s[v4i32,i32]]"({ [3 x <8 x i32>], { <4 x i32>, i32 } } [[TMP42]]) +; SHADERTEST-NEXT: store { [3 x <8 x i32>], { <4 x i32>, i32 } } [[TMP42]], ptr addrspace(5) [[TMP0]], align 32 +; SHADERTEST-NEXT: [[TMP43:%.*]] = load { [3 x <8 x i32>], { <4 x i32>, i32 } }, ptr addrspace(5) [[TMP0]], align 32 +; SHADERTEST-NEXT: [[TMP44:%.*]] = extractvalue { [3 x <8 x i32>], { <4 x i32>, i32 } } [[TMP43]], 1 +; SHADERTEST-NEXT: [[TMP45:%.*]] = extractvalue { [3 x <8 x i32>], { <4 x i32>, i32 } } [[TMP43]], 0 +; SHADERTEST-NEXT: [[TMP46:%.*]] = extractvalue [3 x <8 x i32>] [[TMP45]], 0 +; SHADERTEST-NEXT: [[TMP47:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP44]], 0 +; SHADERTEST-NEXT: [[TMP48:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> [[TMP46]], <4 x i32> [[TMP47]], i32 1, <2 x float> zeroinitializer) diff --git a/llpc/test/shaderdb/core/OpCopyMemory_TestStruct_lit.spvasm b/llpc/test/shaderdb/core/OpCopyMemory_TestStruct_lit.spvasm index 8b6a55f483..d2c5773884 100644 --- a/llpc/test/shaderdb/core/OpCopyMemory_TestStruct_lit.spvasm +++ b/llpc/test/shaderdb/core/OpCopyMemory_TestStruct_lit.spvasm @@ -35,10 +35,10 @@ ; SHADERTEST: store <4 x float> ; SHADERTEST-LABEL: {{^// LLPC.*}} patching results -; SHADERTEST: @llvm.amdgcn.raw.buffer.load.v4i32 -; SHADERTEST: @llvm.amdgcn.raw.buffer.load.v4i32 -; SHADERTEST: @llvm.amdgcn.raw.buffer.load.v4i32 -; SHADERTEST: @llvm.amdgcn.raw.buffer.load.v4i32 +; SHADERTEST: @llvm.amdgcn.s.buffer.load.v4i32 +; SHADERTEST: @llvm.amdgcn.s.buffer.load.v4i32 +; SHADERTEST: @llvm.amdgcn.s.buffer.load.v4i32 +; SHADERTEST: @llvm.amdgcn.s.buffer.load.v4i32 ; SHADERTEST: @llvm.amdgcn.raw.buffer.store.v4i32 ; SHADERTEST: @llvm.amdgcn.raw.buffer.store.v4i32 ; SHADERTEST: @llvm.amdgcn.raw.buffer.store.v4i32 diff --git a/llpc/test/shaderdb/core/OpFMul_TestOperandIsZero.spvasm b/llpc/test/shaderdb/core/OpFMul_TestOperandIsZero.spvasm index af7cb22464..ff8682c708 100644 --- a/llpc/test/shaderdb/core/OpFMul_TestOperandIsZero.spvasm +++ b/llpc/test/shaderdb/core/OpFMul_TestOperandIsZero.spvasm @@ -3,7 +3,9 @@ ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results -; SHADERTEST: call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmul.legacy(float %{{.*}}, float %{{.*}}) +; SHADERTEST: call ninf float @llvm.amdgcn.fmul.legacy(float %{{.*}}, float %{{.*}}) +; SHADERTEST: call ninf float @llvm.amdgcn.fmul.legacy(float %{{.*}}, float %{{.*}}) +; SHADERTEST: call ninf float @llvm.amdgcn.fmul.legacy(float %{{.*}}, float %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS ; END_SHADERTEST @@ -13,6 +15,7 @@ ; Bound: 28 ; Schema: 0 OpCapability Shader + OpCapability Kernel %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 OpEntryPoint Fragment %main "main" %b %a %fragColor @@ -26,6 +29,9 @@ OpDecorate %b Location 1 OpDecorate %a Location 0 OpDecorate %fragColor Location 0 + OpDecorate %22 FPFastMathMode NotInf + OpDecorate %32 FPFastMathMode NotInf + OpDecorate %38 FPFastMathMode NotInf %void = OpTypeVoid %3 = OpTypeFunction %void %float = OpTypeFloat 32 @@ -33,6 +39,8 @@ %_ptr_Input_float = OpTypePointer Input %float %b = OpVariable %_ptr_Input_float Input %float_0 = OpConstant %float 0 + %float_05 = OpConstant %float 0.5 + %float_15 = OpConstant %float 1.5 %bool = OpTypeBool %a = OpVariable %_ptr_Input_float Input %v4float = OpTypeVector %float 4 @@ -50,7 +58,17 @@ %20 = OpLoad %float %b %21 = OpSelect %float %19 %float_0 %20 %22 = OpFMul %float %17 %21 - OpStore %c %22 + %28 = OpLoad %float %b + %29 = OpFOrdEqual %bool %28 %float_0 + %31 = OpSelect %float %29 %float_0 %float_05 + %32 = OpFMul %float %28 %31 + %33 = OpFAdd %float %22 %32 + %34 = OpLoad %float %a + %35 = OpFOrdEqual %bool %34 %float_0 + %37 = OpSelect %float %35 %float_0 %float_15 + %38 = OpFMul %float %34 %37 + %39 = OpFAdd %float %33 %38 + OpStore %c %39 %26 = OpLoad %float %c %27 = OpCompositeConstruct %v4float %26 %26 %26 %26 OpStore %fragColor %27 diff --git a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffsets_lit.frag b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffsets_lit.frag index 3bf68680fb..8908a82ed0 100644 --- a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffsets_lit.frag +++ b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffsets_lit.frag @@ -27,14 +27,14 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 1) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 1, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, {{.*}}, i32 801, <2 x float> , float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ], float 0x3FECCCCCC0000000) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 1, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, {{.*}}, i32 801, <3 x float> , float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ], float 0x3FE99999A0000000) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 1) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, {{.*}}, i32 801, <2 x float> , float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ], float 0x3FE6666660000000) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGather_lit.frag b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGather_lit.frag index 169bbd28b9..77074845cf 100644 --- a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGather_lit.frag +++ b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGather_lit.frag @@ -25,14 +25,14 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 1) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 1, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, {{.*}}, i32 545, <2 x float> , float 0.000000e+00, float 0x3FECCCCCC0000000) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 1, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, {{.*}}, i32 545, <3 x float> , float 0.000000e+00, float 0x3FE99999A0000000) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 1) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 545, <2 x float> , float 0.000000e+00, float 0x3FE6666660000000) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageFetch_TestIntegerSampler_lit.frag b/llpc/test/shaderdb/core/OpImageFetch_TestIntegerSampler_lit.frag index 59d2859894..6fc0d6d679 100644 --- a/llpc/test/shaderdb/core/OpImageFetch_TestIntegerSampler_lit.frag +++ b/llpc/test/shaderdb/core/OpImageFetch_TestIntegerSampler_lit.frag @@ -16,14 +16,14 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call <4 x i32> (...) @lgc.create.image.load.v4i32(i32 1, i32 1540, {{.*}}, <2 x i32> , i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 ; SHADERTEST: call <4 x i32> (...) @lgc.create.image.load.v4i32(i32 1, i32 1536, {{.*}}, <2 x i32> , i32 0) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: call <4 x i32> @llvm.amdgcn.image.load.2d.v4i32.i16(i32 15, i16 0, i16 1, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !11 -; SHADERTEST: call <4 x i32> @llvm.amdgcn.image.load.2d.v4i32.i16(i32 15, i16 0, i16 1, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !11 +; SHADERTEST: call <4 x i32> @llvm.amdgcn.image.load.2d.v4i32.i16(i32 15, i16 0, i16 1, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !{{.*}} +; SHADERTEST: call <4 x i32> @llvm.amdgcn.image.load.2d.v4i32.i16(i32 15, i16 0, i16 1, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !{{.*}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetchOffset_lit.frag b/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetchOffset_lit.frag index a9387e65dd..14eb182981 100644 --- a/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetchOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetchOffset_lit.frag @@ -37,25 +37,25 @@ void main() ; SHADERTEST: {{.*}} OpImageFetch {{.*}} Lod|ConstOffset {{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 1536, {{.*}}, i32 6, i32 3) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 128, {{.*}}, <2 x i32> , i32 6) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 2, i32 1536, {{.*}}, <3 x i32> , i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 9, i32 1536, {{.*}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 4, i32 1536, {{.*}}, <2 x i32> , i32 6) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 5, i32 128, {{.*}}, <3 x i32> , i32 2) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 6, i16 3, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !18 +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 6, i16 3, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !{{.*}} ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 15, i16 12, i16 12, i16 6, <8 x i32> %{{.*}}, i32 0, i32 0) -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 15, i16 4, i16 4, i16 4, i16 2, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !18 -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 9, i16 9, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !18 -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 15, i16 12, i16 5, i16 6, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !18 +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 15, i16 4, i16 4, i16 4, i16 2, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !{{.*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 9, i16 9, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !{{.*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 15, i16 12, i16 5, i16 6, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !{{.*}} ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 15, i16 4, i16 4, i16 1, i16 2, <8 x i32> %{{.*}}, i32 0, i32 0) ; SHADERTEST: AMDLLPC SUCCESS */ diff --git a/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetch_lit.frag b/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetch_lit.frag index 23dab8f55b..78d7897576 100644 --- a/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetch_lit.frag +++ b/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetch_lit.frag @@ -28,21 +28,21 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 4, i32 4, i64 0, i32 2 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 1536, {{.*}}, i32 2, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 128, {{.*}}, <2 x i32> , i32 8) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 9, i32 1536, {{.*}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 4, i32 4, i64 0, i32 2 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 1536, {{.*}}, i32 5) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.with.fmask.v4f32(i32 6, i32 128, {{.*}}, {{.*}}, <2 x i32> , i32 4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 2, i16 2, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !16 +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 2, i16 2, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !{{.*}} ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 15, i16 7, i16 7, i16 8, <8 x i32> %{{.*}}, i32 0, i32 0) -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 3, i16 3, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !16 +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 3, i16 3, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !{{.*}} ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32({{.*}}, i32 5, i32 0, i32 0, i32 0), !invariant.load ; SHADERTEST: call i32 @llvm.amdgcn.image.load.2d.i32.i16(i32 1, i16 6, i16 6, <8 x i32> %{{.*}}, i32 0, i32 0) ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 6, i32 6,{{.*}},{{.*}}, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageGather_TestIntegerSampler.frag b/llpc/test/shaderdb/core/OpImageGather_TestIntegerSampler.frag index f8119bbeef..97be87bb57 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestIntegerSampler.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestIntegerSampler.frag @@ -21,13 +21,13 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 1 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 0 ; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 516, <8 x {{.*}}, <4 x {{.*}}, i32 37, <2 x float> , i32 0, float 0.000000e+00) ; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 516, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, <2 x i32> ) ; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 516, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 1 ; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 37, <2 x float> , i32 0, float 0.000000e+00) ; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, <2 x i32> ) ; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) diff --git a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherBiasLod_lit.frag b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherBiasLod_lit.frag index 35be645e56..7c7805cc3f 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherBiasLod_lit.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherBiasLod_lit.frag @@ -53,17 +53,17 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 3 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 2 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 1 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 0 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 69, <2 x {{.*}}, i32 0, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 1 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 69, <3 x {{.*}}, i32 1, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2 -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 2 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 3, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 69, <3 x {{.*}}, i32 2, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3 -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 3 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 8, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 69, <4 x {{.*}}, i32 3, {{.*}}) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 325, <2 x {{.*}}, i32 0, {{.*}}, <2 x i32> zeroinitializer) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 325, <3 x {{.*}}, i32 1, {{.*}}, <2 x i32> ) diff --git a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffset_lit.frag b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffset_lit.frag index 5ab82c6ac9..584be47256 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffset_lit.frag @@ -25,14 +25,14 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 1) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 1, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x float> , i32 2, float 0.000000e+00, <2 x {{.*}}) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 1, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, <8 x {{.*}}, <4 x {{.*}}, i32 293, <3 x float> , i32 3, float 0.000000e+00, <2 x {{.*}}) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 1) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffsets_lit.frag b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffsets_lit.frag index b517320d60..cb23486ca6 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffsets_lit.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffsets_lit.frag @@ -26,14 +26,14 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 1) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 1, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 293, <2 x float> , i32 2, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 1, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, {{.*}}, {{.*}}, i32 293, <3 x float> , i32 3, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 1) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, {{.*}}, {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageGather_TestTextureGather_lit.frag b/llpc/test/shaderdb/core/OpImageGather_TestTextureGather_lit.frag index 204c04f047..5134dadc01 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestTextureGather_lit.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestTextureGather_lit.frag @@ -24,14 +24,14 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 1) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 1, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 37, <2 x float> , i32 2, float 0.000000e+00) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 1, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, <8 x {{.*}}, <4 x {{.*}}, i32 37, <3 x float> , i32 3, float 0.000000e+00) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 1) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 37, <2 x float> , i32 0, float 0.000000e+00) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageQueryLevels_TestBasic_lit.comp b/llpc/test/shaderdb/core/OpImageQueryLevels_TestBasic_lit.comp index e7476ac118..04e5e0500b 100644 --- a/llpc/test/shaderdb/core/OpImageQueryLevels_TestBasic_lit.comp +++ b/llpc/test/shaderdb/core/OpImageQueryLevels_TestBasic_lit.comp @@ -47,32 +47,32 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 12 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 11 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 10 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 9 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 8 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 7 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 6 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 0, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 0, i32 512, {{.*}}) ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2 ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 2, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3 ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 3, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4 ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 4, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5 ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 5, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 6 ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 8, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 7 ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 0, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 8 ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 9 ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 3, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 10 ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 4, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 11 ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 5, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 12 ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 8, i32 512, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageQueryLevels_TestTextureQueryLevels_lit.frag b/llpc/test/shaderdb/core/OpImageQueryLevels_TestTextureQueryLevels_lit.frag index ceb9067429..403ee311ff 100644 --- a/llpc/test/shaderdb/core/OpImageQueryLevels_TestTextureQueryLevels_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQueryLevels_TestTextureQueryLevels_lit.frag @@ -26,13 +26,13 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 0, i32 512, <8 x {{.*}}) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 128, <8 x {{.*}}) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 512, <8 x {{.*}}) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 8, i32 128, <8 x {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageQueryLod_TestBasic_lit.frag b/llpc/test/shaderdb/core/OpImageQueryLod_TestBasic_lit.frag index 65f7ba6033..a6b5b231f4 100644 --- a/llpc/test/shaderdb/core/OpImageQueryLod_TestBasic_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQueryLod_TestBasic_lit.frag @@ -46,44 +46,44 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 12) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 12) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 11) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 11) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 10) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 10) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 9) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 9) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 8) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 8) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 7) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 7) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 6) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 6) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 5) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 5) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 4) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 4) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 3) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 3) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 2) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 2) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 1) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 0, i32 512, {{.*}}, {{.*}}, float 7.000000e+00) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 1) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 1, i32 512, {{.*}}, {{.*}}, <2 x float> ) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 2) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 2) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 2, i32 512, {{.*}}, {{.*}}, <3 x float> ) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 3) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 3) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 3, i32 512, {{.*}}, {{.*}}, <3 x float> ) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 4) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 4) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 4, i32 512, {{.*}}, {{.*}}, float 7.000000e+00) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 5) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 5) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 5, i32 512, {{.*}}, {{.*}}, <2 x float> ) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 6) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 6) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 8, i32 512, {{.*}}, {{.*}}, <3 x float> ) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 7) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 7) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 0, i32 512, {{.*}}, {{.*}}, float 7.000000e+00) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 8) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 8) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 1, i32 512, {{.*}}, {{.*}}, <2 x float> ) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 9) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 9) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 3, i32 512, {{.*}}, {{.*}}, <3 x float> ) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 10) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 10) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 4, i32 512, {{.*}}, {{.*}}, float 7.000000e+00) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 11) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 11) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 5, i32 512, {{.*}}, {{.*}}, <2 x float> ) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 12) -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 12) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 8, i32 512, {{.*}}, {{.*}}, <3 x float> ) ; SHADERTEST: AMDLLPC SUCCESS */ diff --git a/llpc/test/shaderdb/core/OpImageQuerySamples_TestBasic_lit.comp b/llpc/test/shaderdb/core/OpImageQuerySamples_TestBasic_lit.comp index d6c54f884f..23cefe845d 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySamples_TestBasic_lit.comp +++ b/llpc/test/shaderdb/core/OpImageQuerySamples_TestBasic_lit.comp @@ -23,9 +23,9 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call i32 (...) @lgc.create.image.query.samples.i32(i32 6, i32 512, -; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call i32 (...) @lgc.create.image.query.samples.i32(i32 7, i32 512, ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageQuerySamples_TestImageSamples_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySamples_TestImageSamples_lit.frag index 420b64548a..84befc8fa2 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySamples_TestImageSamples_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySamples_TestImageSamples_lit.frag @@ -22,9 +22,9 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.samples.i32(i32 6, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.query.samples.i32(i32 7, i32 128, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageQuerySamples_TestTextureSamples_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySamples_TestTextureSamples_lit.frag index 104937596b..8f86f670f1 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySamples_TestTextureSamples_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySamples_TestTextureSamples_lit.frag @@ -22,9 +22,9 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.samples.i32(i32 6, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.query.samples.i32(i32 7, i32 128, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageQuerySizeLod_TestTextureSize_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySizeLod_TestTextureSize_lit.frag index a9e92451a6..2340035249 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySizeLod_TestTextureSize_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySizeLod_TestTextureSize_lit.frag @@ -23,11 +23,11 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 1, i32 128, {{.*}}, i32 3) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 1, i32 512, {{.*}}, i32 4) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 2, i32 512, {{.*}}, i32 5) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageQuerySize_TestBasic_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySize_TestBasic_lit.frag index a4f4802ad4..483ef81f1d 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySize_TestBasic_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySize_TestBasic_lit.frag @@ -78,36 +78,36 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 17) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 16) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 12) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 11) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 10) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 9) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 8) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 7) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 6) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 1, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 2, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 3, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 1, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 6) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 3, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 7) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 8, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 8) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 8, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 9) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 9, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 10) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 9, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 11) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 4, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 12) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 5, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 16) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 6, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 17) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 7, i32 512, {{.*}}, i32 0) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageQuerySize_TestImageSize_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySize_TestImageSize_lit.frag index 72a558958f..9a00841e7e 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySize_TestImageSize_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySize_TestImageSize_lit.frag @@ -29,14 +29,14 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 9, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 6, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 128, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 8, i32 128, {{.*}}, i32 0) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageQuerySize_TestImage_lit.comp b/llpc/test/shaderdb/core/OpImageQuerySize_TestImage_lit.comp index 4fdc998a09..fd99503fae 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySize_TestImage_lit.comp +++ b/llpc/test/shaderdb/core/OpImageQuerySize_TestImage_lit.comp @@ -40,25 +40,25 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 9) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 8) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 7) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 6) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 1, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 2, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 3, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 9, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 4, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 6) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 5, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 7) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 8, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 8) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 6, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 9) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 7, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 512, {{.*}}, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageQuerySize_TestTextureSize_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySize_TestTextureSize_lit.frag index ba06a2813c..6c0b3a9485 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySize_TestTextureSize_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySize_TestTextureSize_lit.frag @@ -25,12 +25,12 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 9, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 128, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 6, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 7, i32 128, {{.*}}, i32 0) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageReadWrite_TestImageLoadStoreLod_lit.comp b/llpc/test/shaderdb/core/OpImageReadWrite_TestImageLoadStoreLod_lit.comp index 63d0aca03e..36e5dbefc1 100644 --- a/llpc/test/shaderdb/core/OpImageReadWrite_TestImageLoadStoreLod_lit.comp +++ b/llpc/test/shaderdb/core/OpImageReadWrite_TestImageLoadStoreLod_lit.comp @@ -34,19 +34,19 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 6) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4i32(i32 0, i32 516, {{.*}}, i32 9, i32 7) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4i32(i32 1, i32 516, {{.*}}, <2 x i32> , i32 7) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4i32(i32 2, i32 516, {{.*}}, <3 x i32> , i32 7) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4i32(i32 3, i32 516, {{.*}}, <3 x i32> , i32 7) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4i32(i32 4, i32 516, {{.*}}, <2 x i32> , i32 7) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4i32(i32 5, i32 516, {{.*}}, <3 x i32> , i32 7) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 6) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4i32(i32 8, i32 516, {{.*}}, <4 x i32> , i32 7) ; SHADERTEST: call {{.*}} @lgc.create.image.store({{.*}}, i32 0, i32 516, {{.*}}, i32 9, i32 7) ; SHADERTEST: call {{.*}} @lgc.create.image.store({{.*}}, i32 1, i32 516, {{.*}}, <2 x i32> , i32 7) diff --git a/llpc/test/shaderdb/core/OpImageRead_Test2DMS_lit.comp b/llpc/test/shaderdb/core/OpImageRead_Test2DMS_lit.comp index 40ff2f73e3..86e3e21d7e 100644 --- a/llpc/test/shaderdb/core/OpImageRead_Test2DMS_lit.comp +++ b/llpc/test/shaderdb/core/OpImageRead_Test2DMS_lit.comp @@ -20,9 +20,9 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 6, i32 512, {{.*}}, <3 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 7, i32 512, {{.*}}, <4 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageRead_TestBasic_lit.comp b/llpc/test/shaderdb/core/OpImageRead_TestBasic_lit.comp index dfbd41ff24..16ed00e2c1 100644 --- a/llpc/test/shaderdb/core/OpImageRead_TestBasic_lit.comp +++ b/llpc/test/shaderdb/core/OpImageRead_TestBasic_lit.comp @@ -27,17 +27,17 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 1, i32 512, {{.*}}, <2 x i32> zeroinitializer) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 0, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 2, i32 512, {{.*}}, <3 x i32> zeroinitializer) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 4, i32 512, {{.*}}, <2 x i32> zeroinitializer) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 5, i32 512, {{.*}}, <3 x i32> zeroinitializer) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 9, i32 512, {{.*}}, <2 x i32> zeroinitializer) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageRead_TestCube_lit.comp b/llpc/test/shaderdb/core/OpImageRead_TestCube_lit.comp index 788a6faf65..f86cc7124f 100644 --- a/llpc/test/shaderdb/core/OpImageRead_TestCube_lit.comp +++ b/llpc/test/shaderdb/core/OpImageRead_TestCube_lit.comp @@ -20,9 +20,9 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 3, i32 512, {{.*}}, <3 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 8, i32 512, {{.*}}, <4 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageRead_TestImageLoad_lit.frag b/llpc/test/shaderdb/core/OpImageRead_TestImageLoad_lit.frag index 54bf61691a..3f38b36347 100644 --- a/llpc/test/shaderdb/core/OpImageRead_TestImageLoad_lit.frag +++ b/llpc/test/shaderdb/core/OpImageRead_TestImageLoad_lit.frag @@ -29,14 +29,14 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 0, i32 512, {{.*}}, i32 1) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 9, i32 512, {{.*}}, <2 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 0, i32 128, {{.*}}, i32 4) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 8, i32 128, {{.*}}, <4 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 6, i32 512, {{.*}}, <3 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageRead_TestIntImage_lit.comp b/llpc/test/shaderdb/core/OpImageRead_TestIntImage_lit.comp index 88e3d57e44..58a98840b4 100644 --- a/llpc/test/shaderdb/core/OpImageRead_TestIntImage_lit.comp +++ b/llpc/test/shaderdb/core/OpImageRead_TestIntImage_lit.comp @@ -20,9 +20,9 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4i32(i32 1, i32 516, {{.*}}, <2 x i32> zeroinitializer) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4i32(i32 1, i32 512, {{.*}}, <2 x i32> zeroinitializer) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageRead_TestMemoryQualifier_lit.comp b/llpc/test/shaderdb/core/OpImageRead_TestMemoryQualifier_lit.comp index 9d3af421f1..226e84d136 100644 --- a/llpc/test/shaderdb/core/OpImageRead_TestMemoryQualifier_lit.comp +++ b/llpc/test/shaderdb/core/OpImageRead_TestMemoryQualifier_lit.comp @@ -23,13 +23,13 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2 +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 513, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 515, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageRead_TestNonVec4Data_lit.spvasm b/llpc/test/shaderdb/core/OpImageRead_TestNonVec4Data_lit.spvasm index db13bc0833..b4a50e27b2 100644 --- a/llpc/test/shaderdb/core/OpImageRead_TestNonVec4Data_lit.spvasm +++ b/llpc/test/shaderdb/core/OpImageRead_TestNonVec4Data_lit.spvasm @@ -3,15 +3,15 @@ ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.load.i32(i32 1, i32 512, {{.*}}, <2 x i32> zeroinitializer) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v2i32(i32 1, i32 512, {{.*}}, <2 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v3i32(i32 1, i32 512, {{.*}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.load.i32(i32 1, i32 512, {{.*}}, <2 x i32> zeroinitializer) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v2i32(i32 1, i32 512, {{.*}}, <2 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v3i32(i32 1, i32 512, {{.*}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) ; SHADERTEST: call {{.*}} @lgc.create.image.load.f32(i32 1, i32 512, {{.*}}, <2 x i32> zeroinitializer) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v2f32(i32 1, i32 512, {{.*}}, <2 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v3f32(i32 1, i32 512, {{.*}}, <2 x i32> ) diff --git a/llpc/test/shaderdb/core/OpImageRead_TestSubpassInput_lit.frag b/llpc/test/shaderdb/core/OpImageRead_TestSubpassInput_lit.frag index a0d4c08395..6ba114d321 100644 --- a/llpc/test/shaderdb/core/OpImageRead_TestSubpassInput_lit.frag +++ b/llpc/test/shaderdb/core/OpImageRead_TestSubpassInput_lit.frag @@ -26,17 +26,17 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 1, i32 608, {{.*}}, <2 x i32> zeroinitializer) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.load.with.fmask.v4f32(i32 6, i32 608, {{.*}}, {{.*}}, <2 x i32> zeroinitializer, i32 7) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4i32(i32 1, i32 612, {{.*}}, <2 x i32> zeroinitializer) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 3) ; SHADERTEST: call {{.*}} @lgc.create.image.load.with.fmask.v4i32(i32 6, i32 612, {{.*}}, {{.*}}, <2 x i32> zeroinitializer, i32 7) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4i32(i32 1, i32 608, {{.*}}, <2 x i32> zeroinitializer) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5) ; SHADERTEST: call {{.*}} @lgc.create.image.load.with.fmask.v4i32(i32 6, i32 608, {{.*}}, {{.*}}, <2 x i32> zeroinitializer, i32 7) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradClamp_lit.frag index f0ade20728..9257edf333 100644 --- a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradClamp_lit.frag @@ -37,19 +37,17 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 153, <2 x float> , <2 x float> , <2 x float> , {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> , <3 x float> , <3 x float> , {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 409, <2 x float> , <2 x float> , <2 x float> , {{.*}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 409, <3 x float> , <3 x float> , <3 x float> , {{.*}}, <3 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 153, <2 x float> , <2 x float> , <2 x float> , {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> , <3 x float> , <3 x float> , {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 409, <2 x float> , <2 x float> , <2 x float> , {{.*}}, <2 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 409, <3 x float> , <3 x float> , <3 x float> , {{.*}}, <3 x i32> ) diff --git a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradOffset_lit.frag b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradOffset_lit.frag index 4456a4b971..20588ab0bd 100644 --- a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradOffset_lit.frag @@ -23,15 +23,15 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 281, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 281, <2 x float> , <2 x float> , <2 x float> , <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 281, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 281, <2 x float> , <2 x float> , <2 x float> , <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGrad_lit.frag b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGrad_lit.frag index e1e95c4ed3..008c58de26 100644 --- a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGrad_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGrad_lit.frag @@ -23,15 +23,15 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 25, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 25, <2 x float> , <2 x float> , <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 25, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 25, <2 x float> , <2 x float> , <2 x float> ) ; SHADERTEST-LABEL: pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLodOffset_lit.frag b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLodOffset_lit.frag index 0340844c44..c09ef5e539 100644 --- a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLodOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLodOffset_lit.frag @@ -23,15 +23,15 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 289, float 5.000000e-01, float 0x3FD99999A0000000, i32 6) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 289, <2 x float> , float 0x3FE6666660000000, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 289, float 5.000000e-01, float 0x3FD99999A0000000, i32 6) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 289, <2 x float> , float 0x3FE6666660000000, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLod_lit.frag b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLod_lit.frag index 400bce25ce..cd68490b42 100644 --- a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLod_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLod_lit.frag @@ -23,15 +23,15 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 33, float 5.000000e-01, float 0x3FD99999A0000000) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 33, <2 x float> , float 0x3FE6666660000000) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 33, float 5.000000e-01, float 0x3FD99999A0000000) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 33, <2 x float> , float 0x3FE6666660000000) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestIntegerSampler_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestIntegerSampler_lit.frag index d3e4c52bb2..5bddde6b7a 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestIntegerSampler_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestIntegerSampler_lit.frag @@ -20,9 +20,9 @@ void main() ; SHADERTEST: call <4 x i32> (...) @lgc.create.image.sample.v4i32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4i32(i32 1, i32 516, {{.*}}, {{.*}}, i32 1, <2 x float> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4i32(i32 1, i32 512, {{.*}}, {{.*}}, i32 1, <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureBiasClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureBiasClamp_lit.frag index 37b558afde..e141adff10 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureBiasClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureBiasClamp_lit.frag @@ -42,35 +42,35 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 6, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 193, float %{{[0-9]*}}, float 2.000000e+00, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 193, <2 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 193, <3 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 3, i32 512, {{.*}}, {{.*}}, i32 193, <3 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 193, <2 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 5, i32 512, {{.*}}, {{.*}}, i32 193, <3 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 6, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 8, i32 512, {{.*}}, {{.*}}, i32 193, <4 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 6, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 193, float %{{[0-9]*}}, float 2.000000e+00, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 193, <2 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 193, <3 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 3, i32 512, {{.*}}, {{.*}}, i32 193, <3 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 193, <2 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 5, i32 512, {{.*}}, {{.*}}, i32 193, <3 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 6, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 8, i32 512, {{.*}}, {{.*}}, i32 193, <4 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureClamp_lit.frag index 7c8a27253a..002bcc9bf2 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureClamp_lit.frag @@ -42,35 +42,35 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 6, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 129, float %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 129, <2 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 129, <3 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 3, i32 512, {{.*}}, {{.*}}, i32 129, <3 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 129, <2 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 5, i32 512, {{.*}}, {{.*}}, i32 129, <3 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 6, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 8, i32 512, {{.*}}, {{.*}}, i32 129, <4 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 6, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 129, float %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 129, <2 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 129, <3 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 3, i32 512, {{.*}}, {{.*}}, i32 129, <3 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 129, <2 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 5, i32 512, {{.*}}, {{.*}}, i32 129, <3 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 6, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 8, i32 512, {{.*}}, {{.*}}, i32 129, <4 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradClamp_lit.frag index 5e096d2072..add49732f6 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradClamp_lit.frag @@ -44,35 +44,35 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 6, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 153, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 153, <2 x float> %{{[0-9]*}}, <2 x float> %{{[0-9]*}}, <2 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 3, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 153, <2 x float> %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 5, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> %{{[0-9]*}}, <2 x float> %{{[0-9]*}}, <2 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 6, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 8, i32 512, {{.*}}, {{.*}}, i32 153, <4 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 6, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 153, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 153, <2 x float> %{{[0-9]*}}, <2 x float> %{{[0-9]*}}, <2 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 3, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 153, <2 x float> %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 5, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> %{{[0-9]*}}, <2 x float> %{{[0-9]*}}, <2 x float> %{{[0-9]*}}, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 6, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 8, i32 512, {{.*}}, {{.*}}, i32 153, <4 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradOffsetClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradOffsetClamp_lit.frag index f7062a9ea0..149360a1f3 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradOffsetClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradOffsetClamp_lit.frag @@ -37,27 +37,27 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 409, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, {{.*}}, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 409, <2 x float> , <2 x float> , <2 x float> , {{.*}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 409, <3 x float> , <3 x float> , <3 x float> , {{.*}}, <3 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 409, <2 x float> , float 0x3FC99999A0000000, float 0x3FD3333340000000, {{.*}}, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 5, i32 512, {{.*}}, {{.*}}, i32 409, <3 x float> , <2 x float> , <2 x float> , {{.*}}, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 409, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, {{.*}}, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 409, <2 x float> , <2 x float> , <2 x float> , {{.*}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 409, <3 x float> , <3 x float> , <3 x float> , {{.*}}, <3 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 409, <2 x float> , float 0x3FC99999A0000000, float 0x3FD3333340000000, {{.*}}, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 5, i32 512, {{.*}}, {{.*}}, i32 409, <3 x float> , <2 x float> , <2 x float> , {{.*}}, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffsetClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffsetClamp_lit.frag index 982ad556e9..761e8f8e83 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffsetClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffsetClamp_lit.frag @@ -37,27 +37,27 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 385, float 0x3FB99999A0000000, {{.*}}, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 385, <2 x float> , {{.*}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 385, <3 x float> , {{.*}}, <3 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 385, <2 x float> , {{.*}}, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 5, i32 512, {{.*}}, {{.*}}, i32 385, <3 x float> , {{.*}}, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 385, float 0x3FB99999A0000000, {{.*}}, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 385, <2 x float> , {{.*}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 385, <3 x float> , {{.*}}, <3 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 4, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 385, <2 x float> , {{.*}}, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 5, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 5, i32 512, {{.*}}, {{.*}}, i32 385, <3 x float> , {{.*}}, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffset_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffset_lit.frag index 6330a4b81d..1b6c64e883 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffset_lit.frag @@ -22,15 +22,15 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 321, float 1.000000e+00, float 0x3FD99999A0000000, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 257, <2 x float> , <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 321, float 1.000000e+00, float 0x3FD99999A0000000, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 257, <2 x float> , <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTexture_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTexture_lit.frag index 17e2177b70..380f48a887 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTexture_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTexture_lit.frag @@ -23,15 +23,15 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 65, float 1.000000e+00, float 0x3FD99999A0000000) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 1, <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 65, float 1.000000e+00, float 0x3FD99999A0000000) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 1, <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSample_TestSeparateSampler_lit.frag b/llpc/test/shaderdb/core/OpImageSample_TestSeparateSampler_lit.frag index b49cd5f077..71ecedf341 100644 --- a/llpc/test/shaderdb/core/OpImageSample_TestSeparateSampler_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSample_TestSeparateSampler_lit.frag @@ -14,15 +14,15 @@ void main() { ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 1, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 1, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 1, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 1, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageWrite_TestIntImage.comp b/llpc/test/shaderdb/core/OpImageWrite_TestIntImage.comp index bf9aaa5db4..6575793783 100644 --- a/llpc/test/shaderdb/core/OpImageWrite_TestIntImage.comp +++ b/llpc/test/shaderdb/core/OpImageWrite_TestIntImage.comp @@ -18,6 +18,7 @@ void main() /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST: call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) %0) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/core/OpImageWrite_TestIntImage_Aliased.spvasm b/llpc/test/shaderdb/core/OpImageWrite_TestIntImage_Aliased.spvasm new file mode 100644 index 0000000000..f2be24abc7 --- /dev/null +++ b/llpc/test/shaderdb/core/OpImageWrite_TestIntImage_Aliased.spvasm @@ -0,0 +1,64 @@ + +; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s +; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results +; SHADERTEST-NOT: call ptr @llvm.invariant.start.p7 +; SHADERTEST: AMDLLPC SUCCESS + +; SPIR-V +; Version: 1.6 +; Generator: Khronos Glslang Reference Front End; 11 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %img2Di %_ %img2Du + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %img2Di "img2Di" + OpName %BO "BO" + OpMemberName %BO 0 "mIn" + OpName %_ "" + OpName %img2Du "img2Du" + OpDecorate %img2Di DescriptorSet 0 + OpDecorate %img2Di Binding 0 + OpMemberDecorate %BO 0 Offset 0 + OpDecorate %BO Block + OpDecorate %_ DescriptorSet 1 + OpDecorate %_ Binding 0 + OpDecorate %_ Aliased + OpDecorate %img2Du DescriptorSet 0 + OpDecorate %img2Du Binding 1 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %7 = OpTypeImage %int 2D 0 0 0 2 Rgba8i +%_ptr_UniformConstant_7 = OpTypePointer UniformConstant %7 + %img2Di = OpVariable %_ptr_UniformConstant_7 UniformConstant + %v2int = OpTypeVector %int 2 + %int_0 = OpConstant %int 0 + %13 = OpConstantComposite %v2int %int_0 %int_0 + %v4int = OpTypeVector %int 4 + %BO = OpTypeStruct %v4int +%_ptr_StorageBuffer_BO = OpTypePointer StorageBuffer %BO + %_ = OpVariable %_ptr_StorageBuffer_BO StorageBuffer +%_ptr_StorageBuffer_v4int = OpTypePointer StorageBuffer %v4int + %uint = OpTypeInt 32 0 + %22 = OpTypeImage %uint 2D 0 0 0 2 Rgba8ui +%_ptr_UniformConstant_22 = OpTypePointer UniformConstant %22 + %img2Du = OpVariable %_ptr_UniformConstant_22 UniformConstant + %v4uint = OpTypeVector %uint 4 + %main = OpFunction %void None %3 + %5 = OpLabel + %10 = OpLoad %7 %img2Di + %19 = OpAccessChain %_ptr_StorageBuffer_v4int %_ %int_0 + %20 = OpLoad %v4int %19 + OpImageWrite %10 %13 %20 SignExtend + %25 = OpLoad %22 %img2Du + %26 = OpAccessChain %_ptr_StorageBuffer_v4int %_ %int_0 + %27 = OpLoad %v4int %26 + %29 = OpBitcast %v4uint %27 + OpImageWrite %25 %13 %29 ZeroExtend + OpReturn + OpFunctionEnd diff --git a/llpc/test/shaderdb/core/OpLogicalNotEqual_TestGeneral_lit.frag b/llpc/test/shaderdb/core/OpLogicalNotEqual_TestGeneral_lit.frag index 8b39400fa1..8868c4c22e 100644 --- a/llpc/test/shaderdb/core/OpLogicalNotEqual_TestGeneral_lit.frag +++ b/llpc/test/shaderdb/core/OpLogicalNotEqual_TestGeneral_lit.frag @@ -28,15 +28,16 @@ void main() // SHADERTEST-LABEL: @lgc.shader.FS.main( // SHADERTEST-NEXT: .entry: // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 2) -// SHADERTEST-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(7) [[TMP0]], align 4 -// SHADERTEST-NEXT: [[TMP2:%.*]] = getelementptr inbounds {{i8|<{ i32, [[]4 x i8], [[]2 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2, i32 0}} -// SHADERTEST-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(7) [[TMP2]], align 4 -// SHADERTEST-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP1]], [[TMP3]] -// SHADERTEST-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ i32, [[]4 x i8], [[]2 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{12|0, i32 2, i32 1}} -// SHADERTEST-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(7) [[TMP5]], align 4 -// SHADERTEST-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP1]], [[TMP6]] -// SHADERTEST-NEXT: [[TMP8:%.*]] = and i1 {{%4|%7}}, {{%7|%4}} -// SHADERTEST-NEXT: [[TMP9:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP8]], <4 x float> zeroinitializer, <4 x float> -// SHADERTEST-NEXT: call void (...) @lgc.create.write.generic.output(<4 x float> [[TMP9]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) +// SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) +// SHADERTEST-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(7) [[TMP0]], align 4 +// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 8 +// SHADERTEST-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(7) [[TMP3]], align 4 +// SHADERTEST-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP2]], [[TMP4]] +// SHADERTEST-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 12 +// SHADERTEST-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(7) [[TMP6]], align 4 +// SHADERTEST-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP2]], [[TMP7]] +// SHADERTEST-NEXT: [[TMP9:%.*]] = and i1 [[TMP8]], [[TMP5]] +// SHADERTEST-NEXT: [[TMP10:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP9]], <4 x float> zeroinitializer, <4 x float> +// SHADERTEST-NEXT: call void (...) @lgc.create.write.generic.output(<4 x float> [[TMP10]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) // SHADERTEST-NEXT: ret void // diff --git a/llpc/test/shaderdb/core/OpPhi_Switch_FunctionCall_Phi.spvasm b/llpc/test/shaderdb/core/OpPhi_Switch_FunctionCall_Phi.spvasm new file mode 100644 index 0000000000..7b2dc25a3d --- /dev/null +++ b/llpc/test/shaderdb/core/OpPhi_Switch_FunctionCall_Phi.spvasm @@ -0,0 +1,128 @@ +; RUN: amdllpc -verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s +; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results +; SHADERTEST: AMDLLPC SUCCESS +; END_SHADERTEST + +; SPIR-V +; Version: 1.6 +; Generator: Khronos Glslang Reference Front End; 11 +; Bound: 80 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %_ %color %envCoord + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpName %main "main" + OpName %foo_vf2_b1_ "foo(vf2;b1;" + OpName %coord "coord" + OpName %cond "cond" + OpName %f4 "f4" + OpName %Uniforms "Uniforms" + OpMemberName %Uniforms 0 "i" + OpName %_ "" + OpName %j "j" + OpName %color "color" + OpName %envCoord "envCoord" + OpName %param "param" + OpMemberDecorate %Uniforms 0 Offset 0 + OpDecorate %Uniforms Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %color Location 0 + OpDecorate %envCoord Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 +%_ptr_Function_v2float = OpTypePointer Function %v2float + %bool = OpTypeBool + %10 = OpTypeFunction %v2float %_ptr_Function_v2float %bool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Function_float = OpTypePointer Function %float + %float_1 = OpConstant %float 1 + %uint_1 = OpConstant %uint 1 + %v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %float_0 = OpConstant %float 0 + %37 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %int = OpTypeInt 32 1 + %Uniforms = OpTypeStruct %int +%_ptr_Uniform_Uniforms = OpTypePointer Uniform %Uniforms + %_ = OpVariable %_ptr_Uniform_Uniforms Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_int = OpTypePointer Uniform %int +%_ptr_Function_int = OpTypePointer Function %int + %int_9 = OpConstant %int 9 + %int_90 = OpConstant %int 90 +%_ptr_Output_v2float = OpTypePointer Output %v2float + %color = OpVariable %_ptr_Output_v2float Output +%_ptr_Input_v2float = OpTypePointer Input %v2float + %envCoord = OpVariable %_ptr_Input_v2float Input + %false = OpConstantFalse %bool + %int_20 = OpConstant %int 20 +%_ptr_Output_float = OpTypePointer Output %float + %float_2 = OpConstant %float 2 + %main = OpFunction %void None %3 + %5 = OpLabel + %f4 = OpVariable %_ptr_Function_v4float Function + %j = OpVariable %_ptr_Function_int Function + %param = OpVariable %_ptr_Function_v2float Function + OpStore %f4 %37 + %44 = OpAccessChain %_ptr_Uniform_int %_ %int_0 + %45 = OpLoad %int %44 + OpSelectionMerge %48 None + OpSwitch %45 %48 0 %46 1 %47 2 %48 + %47 = OpLabel + %57 = OpAccessChain %_ptr_Uniform_int %_ %int_0 + %58 = OpLoad %int %57 + %59 = OpIAdd %int %int_90 %58 + %66 = OpLoad %v2float %envCoord + OpStore %param %66 + %67 = OpFunctionCall %v2float %foo_vf2_b1_ %param %false + OpStore %color %67 + OpBranch %48 + %46 = OpLabel + %52 = OpAccessChain %_ptr_Uniform_int %_ %int_0 + %53 = OpLoad %int %52 + %54 = OpIAdd %int %int_9 %53 + OpBranch %48 + %48 = OpLabel + %70 = OpPhi %int %59 %47 %54 %46 %45 %5 + %72 = OpSGreaterThan %bool %70 %int_20 + OpSelectionMerge %74 None + OpBranchConditional %72 %73 %77 + %73 = OpLabel + %76 = OpAccessChain %_ptr_Output_float %color %uint_0 + OpStore %76 %float_1 + OpBranch %74 + %77 = OpLabel + %79 = OpAccessChain %_ptr_Output_float %color %uint_0 + OpStore %79 %float_2 + OpBranch %74 + %74 = OpLabel + OpReturn + OpFunctionEnd +%foo_vf2_b1_ = OpFunction %v2float None %10 + %coord = OpFunctionParameter %_ptr_Function_v2float + %cond = OpFunctionParameter %bool + %14 = OpLabel + OpSelectionMerge %16 None + OpBranchConditional %cond %15 %29 + %15 = OpLabel + %20 = OpAccessChain %_ptr_Function_float %coord %uint_0 + %21 = OpLoad %float %20 + %24 = OpAccessChain %_ptr_Function_float %coord %uint_1 + %25 = OpLoad %float %24 + %26 = OpFSub %float %float_1 %25 + %27 = OpCompositeConstruct %v2float %21 %26 + OpReturnValue %27 + %29 = OpLabel + %30 = OpLoad %v2float %coord + OpReturnValue %30 + %16 = OpLabel + OpUnreachable + OpFunctionEnd + diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestRelationalFuncs_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestRelationalFuncs_lit.frag index 59b52df5ae..e1576ee13c 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestRelationalFuncs_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestRelationalFuncs_lit.frag @@ -42,25 +42,26 @@ void main() // SHADERTEST-LABEL: @lgc.shader.FS.main( // SHADERTEST-NEXT: .entry: // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 2) -// SHADERTEST-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(7) [[TMP0]], align 16 -// SHADERTEST-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> -// SHADERTEST-NEXT: [[TMP3:%.*]] = fptrunc <2 x float> [[TMP2]] to <2 x half> -// SHADERTEST-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> -// SHADERTEST-NEXT: [[TMP5:%.*]] = fptrunc <2 x float> [[TMP4]] to <2 x half> -// SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <2 x half> [[TMP3]], i64 0 -// SHADERTEST-NEXT: [[TMP7:%.*]] = extractelement <2 x half> [[TMP5]], i64 0 -// SHADERTEST-NEXT: [[TMP8:%.*]] = fcmp oeq half [[TMP6]], [[TMP7]] -// SHADERTEST-NEXT: [[TMP9:%.*]] = fcmp une half [[TMP6]], [[TMP7]] -// SHADERTEST-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] -// SHADERTEST-NEXT: [[TMP11:%.*]] = fcmp olt half [[TMP6]], [[TMP7]] -// SHADERTEST-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]] -// SHADERTEST-NEXT: [[TMP13:%.*]] = fcmp ogt half [[TMP6]], [[TMP7]] -// SHADERTEST-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]] -// SHADERTEST-NEXT: [[TMP15:%.*]] = fcmp ole half [[TMP6]], [[TMP7]] -// SHADERTEST-NEXT: [[TMP16:%.*]] = and i1 [[TMP14]], [[TMP15]] -// SHADERTEST-NEXT: [[TMP17:%.*]] = fcmp oge half [[TMP6]], [[TMP7]] -// SHADERTEST-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] -// SHADERTEST-NEXT: [[TMP19:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP18]], <3 x float> , <3 x float> zeroinitializer -// SHADERTEST-NEXT: call void (...) @lgc.create.write.generic.output(<3 x float> [[TMP19]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) +// SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) +// SHADERTEST-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr addrspace(7) [[TMP0]], align 16 +// SHADERTEST-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> +// SHADERTEST-NEXT: [[TMP4:%.*]] = fptrunc <2 x float> [[TMP3]] to <2 x half> +// SHADERTEST-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <2 x i32> +// SHADERTEST-NEXT: [[TMP6:%.*]] = fptrunc <2 x float> [[TMP5]] to <2 x half> +// SHADERTEST-NEXT: [[TMP7:%.*]] = extractelement <2 x half> [[TMP4]], i64 0 +// SHADERTEST-NEXT: [[TMP8:%.*]] = extractelement <2 x half> [[TMP6]], i64 0 +// SHADERTEST-NEXT: [[TMP9:%.*]] = fcmp oeq half [[TMP7]], [[TMP8]] +// SHADERTEST-NEXT: [[TMP10:%.*]] = fcmp une half [[TMP7]], [[TMP8]] +// SHADERTEST-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]] +// SHADERTEST-NEXT: [[TMP12:%.*]] = fcmp olt half [[TMP7]], [[TMP8]] +// SHADERTEST-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] +// SHADERTEST-NEXT: [[TMP14:%.*]] = fcmp ogt half [[TMP7]], [[TMP8]] +// SHADERTEST-NEXT: [[TMP15:%.*]] = or i1 [[TMP13]], [[TMP14]] +// SHADERTEST-NEXT: [[TMP16:%.*]] = fcmp ole half [[TMP7]], [[TMP8]] +// SHADERTEST-NEXT: [[TMP17:%.*]] = and i1 [[TMP15]], [[TMP16]] +// SHADERTEST-NEXT: [[TMP18:%.*]] = fcmp oge half [[TMP7]], [[TMP8]] +// SHADERTEST-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]] +// SHADERTEST-NEXT: [[TMP20:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP19]], <3 x float> , <3 x float> zeroinitializer +// SHADERTEST-NEXT: call void (...) @lgc.create.write.generic.output(<3 x float> [[TMP20]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) // SHADERTEST-NEXT: ret void // diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestRelationalOp_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestRelationalOp_lit.frag index ededdfb7a3..4fb6f2cbf6 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestRelationalOp_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestRelationalOp_lit.frag @@ -1,5 +1,6 @@ // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py // RUN: amdllpc -emit-lgc -gfxip 10.3 -o - %s | FileCheck -check-prefix=SHADERTEST %s +// REQUIRES: do-not-run-me #version 450 @@ -36,9 +37,9 @@ void main() // SHADERTEST-NEXT: .entry: // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) -// SHADERTEST-NEXT: [[TMP2:%.*]] = getelementptr inbounds {{i8|<{ i64, i64, [[]16 x i8], [[]3 x i64], [[]8 x i8], [[]3 x i64] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 3}} +// SHADERTEST-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 32 // SHADERTEST-NEXT: [[TMP3:%.*]] = load <3 x i64>, ptr addrspace(7) [[TMP2]], align 32 -// SHADERTEST-NEXT: [[TMP4:%.*]] = getelementptr inbounds {{i8|<{ i64, i64, [[]16 x i8], [[]3 x i64], [[]8 x i8], [[]3 x i64] }>}}, ptr addrspace(7) [[TMP0]], i32 {{64|0, i32 5}} +// SHADERTEST-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 64 // SHADERTEST-NEXT: [[TMP5:%.*]] = load <3 x i64>, ptr addrspace(7) [[TMP4]], align 32 // SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <3 x i64> [[TMP3]], i64 0 // SHADERTEST-NEXT: [[TMP7:%.*]] = extractelement <3 x i64> [[TMP5]], i64 0 @@ -53,97 +54,96 @@ void main() // SHADERTEST-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] // SHADERTEST-NEXT: [[TMP17:%.*]] = insertelement <3 x i1> [[TMP13]], i1 [[TMP16]], i64 2 // SHADERTEST-NEXT: [[TMP18:%.*]] = zext <3 x i1> [[TMP17]] to <3 x i32> -// SHADERTEST-NEXT: [[DOT020_FR:%.*]] = freeze <3 x i32> [[TMP18]] -// SHADERTEST-NEXT: [[B3_0_VEC_EXTRACT7:%.*]] = extractelement <3 x i32> [[DOT020_FR]], i64 0 -// SHADERTEST-NEXT: [[TMP19:%.*]] = and i32 [[B3_0_VEC_EXTRACT7]], 1 -// SHADERTEST-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP19]], 0 -// SHADERTEST-NEXT: br i1 [[DOTNOT]], label [[TMP27:%.*]], label [[TMP20:%.*]] -// SHADERTEST: 20: -// SHADERTEST-NEXT: [[TMP21:%.*]] = icmp uge i64 [[TMP6]], [[TMP7]] -// SHADERTEST-NEXT: [[TMP22:%.*]] = insertelement <3 x i1> poison, i1 [[TMP21]], i64 0 -// SHADERTEST-NEXT: [[TMP23:%.*]] = icmp uge i64 [[TMP10]], [[TMP11]] -// SHADERTEST-NEXT: [[TMP24:%.*]] = insertelement <3 x i1> [[TMP22]], i1 [[TMP23]], i64 1 -// SHADERTEST-NEXT: [[TMP25:%.*]] = icmp uge i64 [[TMP14]], [[TMP15]] -// SHADERTEST-NEXT: [[TMP26:%.*]] = insertelement <3 x i1> [[TMP24]], i1 [[TMP25]], i64 2 -// SHADERTEST-NEXT: br label [[TMP34:%.*]] -// SHADERTEST: 27: -// SHADERTEST-NEXT: [[TMP28:%.*]] = icmp ule i64 [[TMP6]], [[TMP7]] -// SHADERTEST-NEXT: [[TMP29:%.*]] = insertelement <3 x i1> poison, i1 [[TMP28]], i64 0 -// SHADERTEST-NEXT: [[TMP30:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] -// SHADERTEST-NEXT: [[TMP31:%.*]] = insertelement <3 x i1> [[TMP29]], i1 [[TMP30]], i64 1 -// SHADERTEST-NEXT: [[TMP32:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] -// SHADERTEST-NEXT: [[TMP33:%.*]] = insertelement <3 x i1> [[TMP31]], i1 [[TMP32]], i64 2 -// SHADERTEST-NEXT: br label [[TMP34]] -// SHADERTEST: 34: -// SHADERTEST-NEXT: [[DOT021_IN:%.*]] = phi <3 x i1> [ [[TMP26]], [[TMP20]] ], [ [[TMP33]], [[TMP27]] ] +// SHADERTEST-NEXT: [[TMP19:%.*]] = bitcast <3 x i32> [[TMP18]] to <96 x i1> +// SHADERTEST-NEXT: [[DOT020_FR:%.*]] = freeze <96 x i1> [[TMP19]] +// SHADERTEST-NEXT: [[TMP20:%.*]] = extractelement <96 x i1> [[DOT020_FR]], i64 0 +// SHADERTEST-NEXT: br i1 [[TMP20]], label [[TMP21:%.*]], label [[TMP28:%.*]] +// SHADERTEST: 21: +// SHADERTEST-NEXT: [[TMP22:%.*]] = icmp uge i64 [[TMP6]], [[TMP7]] +// SHADERTEST-NEXT: [[TMP23:%.*]] = insertelement <3 x i1> poison, i1 [[TMP22]], i64 0 +// SHADERTEST-NEXT: [[TMP24:%.*]] = icmp uge i64 [[TMP10]], [[TMP11]] +// SHADERTEST-NEXT: [[TMP25:%.*]] = insertelement <3 x i1> [[TMP23]], i1 [[TMP24]], i64 1 +// SHADERTEST-NEXT: [[TMP26:%.*]] = icmp uge i64 [[TMP14]], [[TMP15]] +// SHADERTEST-NEXT: [[TMP27:%.*]] = insertelement <3 x i1> [[TMP25]], i1 [[TMP26]], i64 2 +// SHADERTEST-NEXT: br label [[TMP35:%.*]] +// SHADERTEST: 28: +// SHADERTEST-NEXT: [[TMP29:%.*]] = icmp ule i64 [[TMP6]], [[TMP7]] +// SHADERTEST-NEXT: [[TMP30:%.*]] = insertelement <3 x i1> poison, i1 [[TMP29]], i64 0 +// SHADERTEST-NEXT: [[TMP31:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] +// SHADERTEST-NEXT: [[TMP32:%.*]] = insertelement <3 x i1> [[TMP30]], i1 [[TMP31]], i64 1 +// SHADERTEST-NEXT: [[TMP33:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] +// SHADERTEST-NEXT: [[TMP34:%.*]] = insertelement <3 x i1> [[TMP32]], i1 [[TMP33]], i64 2 +// SHADERTEST-NEXT: br label [[TMP35]] +// SHADERTEST: 35: +// SHADERTEST-NEXT: [[DOT021_IN:%.*]] = phi <3 x i1> [ [[TMP27]], [[TMP21]] ], [ [[TMP34]], [[TMP28]] ] // SHADERTEST-NEXT: [[DOT021_IN_FR:%.*]] = freeze <3 x i1> [[DOT021_IN]] -// SHADERTEST-NEXT: [[TMP35:%.*]] = extractelement <3 x i1> [[DOT021_IN_FR]], i64 0 -// SHADERTEST-NEXT: br i1 [[TMP35]], label [[TMP36:%.*]], label [[TMP51:%.*]] -// SHADERTEST: 36: -// SHADERTEST-NEXT: [[TMP37:%.*]] = load <3 x i64>, ptr addrspace(7) [[TMP2]], align 32 -// SHADERTEST-NEXT: [[TMP38:%.*]] = load <3 x i64>, ptr addrspace(7) [[TMP4]], align 32 -// SHADERTEST-NEXT: [[TMP39:%.*]] = extractelement <3 x i64> [[TMP37]], i64 0 +// SHADERTEST-NEXT: [[TMP36:%.*]] = extractelement <3 x i1> [[DOT021_IN_FR]], i64 0 +// SHADERTEST-NEXT: br i1 [[TMP36]], label [[TMP37:%.*]], label [[TMP52:%.*]] +// SHADERTEST: 37: +// SHADERTEST-NEXT: [[TMP38:%.*]] = load <3 x i64>, ptr addrspace(7) [[TMP2]], align 32 +// SHADERTEST-NEXT: [[TMP39:%.*]] = load <3 x i64>, ptr addrspace(7) [[TMP4]], align 32 // SHADERTEST-NEXT: [[TMP40:%.*]] = extractelement <3 x i64> [[TMP38]], i64 0 -// SHADERTEST-NEXT: [[TMP41:%.*]] = icmp ugt i64 [[TMP39]], [[TMP40]] -// SHADERTEST-NEXT: [[TMP42:%.*]] = insertelement <3 x i1> poison, i1 [[TMP41]], i64 0 -// SHADERTEST-NEXT: [[TMP43:%.*]] = extractelement <3 x i64> [[TMP37]], i64 1 +// SHADERTEST-NEXT: [[TMP41:%.*]] = extractelement <3 x i64> [[TMP39]], i64 0 +// SHADERTEST-NEXT: [[TMP42:%.*]] = icmp ugt i64 [[TMP40]], [[TMP41]] +// SHADERTEST-NEXT: [[TMP43:%.*]] = insertelement <3 x i1> poison, i1 [[TMP42]], i64 0 // SHADERTEST-NEXT: [[TMP44:%.*]] = extractelement <3 x i64> [[TMP38]], i64 1 -// SHADERTEST-NEXT: [[TMP45:%.*]] = icmp ugt i64 [[TMP43]], [[TMP44]] -// SHADERTEST-NEXT: [[TMP46:%.*]] = insertelement <3 x i1> [[TMP42]], i1 [[TMP45]], i64 1 -// SHADERTEST-NEXT: [[TMP47:%.*]] = extractelement <3 x i64> [[TMP37]], i64 2 +// SHADERTEST-NEXT: [[TMP45:%.*]] = extractelement <3 x i64> [[TMP39]], i64 1 +// SHADERTEST-NEXT: [[TMP46:%.*]] = icmp ugt i64 [[TMP44]], [[TMP45]] +// SHADERTEST-NEXT: [[TMP47:%.*]] = insertelement <3 x i1> [[TMP43]], i1 [[TMP46]], i64 1 // SHADERTEST-NEXT: [[TMP48:%.*]] = extractelement <3 x i64> [[TMP38]], i64 2 -// SHADERTEST-NEXT: [[TMP49:%.*]] = icmp ugt i64 [[TMP47]], [[TMP48]] -// SHADERTEST-NEXT: [[TMP50:%.*]] = insertelement <3 x i1> [[TMP46]], i1 [[TMP49]], i64 2 -// SHADERTEST-NEXT: br label [[TMP66:%.*]] -// SHADERTEST: 51: -// SHADERTEST-NEXT: [[TMP52:%.*]] = load <3 x i64>, ptr addrspace(7) [[TMP2]], align 32 -// SHADERTEST-NEXT: [[TMP53:%.*]] = load <3 x i64>, ptr addrspace(7) [[TMP4]], align 32 -// SHADERTEST-NEXT: [[TMP54:%.*]] = extractelement <3 x i64> [[TMP52]], i64 0 +// SHADERTEST-NEXT: [[TMP49:%.*]] = extractelement <3 x i64> [[TMP39]], i64 2 +// SHADERTEST-NEXT: [[TMP50:%.*]] = icmp ugt i64 [[TMP48]], [[TMP49]] +// SHADERTEST-NEXT: [[TMP51:%.*]] = insertelement <3 x i1> [[TMP47]], i1 [[TMP50]], i64 2 +// SHADERTEST-NEXT: br label [[TMP67:%.*]] +// SHADERTEST: 52: +// SHADERTEST-NEXT: [[TMP53:%.*]] = load <3 x i64>, ptr addrspace(7) [[TMP2]], align 32 +// SHADERTEST-NEXT: [[TMP54:%.*]] = load <3 x i64>, ptr addrspace(7) [[TMP4]], align 32 // SHADERTEST-NEXT: [[TMP55:%.*]] = extractelement <3 x i64> [[TMP53]], i64 0 -// SHADERTEST-NEXT: [[TMP56:%.*]] = icmp ult i64 [[TMP54]], [[TMP55]] -// SHADERTEST-NEXT: [[TMP57:%.*]] = insertelement <3 x i1> poison, i1 [[TMP56]], i64 0 -// SHADERTEST-NEXT: [[TMP58:%.*]] = extractelement <3 x i64> [[TMP52]], i64 1 +// SHADERTEST-NEXT: [[TMP56:%.*]] = extractelement <3 x i64> [[TMP54]], i64 0 +// SHADERTEST-NEXT: [[TMP57:%.*]] = icmp ult i64 [[TMP55]], [[TMP56]] +// SHADERTEST-NEXT: [[TMP58:%.*]] = insertelement <3 x i1> poison, i1 [[TMP57]], i64 0 // SHADERTEST-NEXT: [[TMP59:%.*]] = extractelement <3 x i64> [[TMP53]], i64 1 -// SHADERTEST-NEXT: [[TMP60:%.*]] = icmp ult i64 [[TMP58]], [[TMP59]] -// SHADERTEST-NEXT: [[TMP61:%.*]] = insertelement <3 x i1> [[TMP57]], i1 [[TMP60]], i64 1 -// SHADERTEST-NEXT: [[TMP62:%.*]] = extractelement <3 x i64> [[TMP52]], i64 2 +// SHADERTEST-NEXT: [[TMP60:%.*]] = extractelement <3 x i64> [[TMP54]], i64 1 +// SHADERTEST-NEXT: [[TMP61:%.*]] = icmp ult i64 [[TMP59]], [[TMP60]] +// SHADERTEST-NEXT: [[TMP62:%.*]] = insertelement <3 x i1> [[TMP58]], i1 [[TMP61]], i64 1 // SHADERTEST-NEXT: [[TMP63:%.*]] = extractelement <3 x i64> [[TMP53]], i64 2 -// SHADERTEST-NEXT: [[TMP64:%.*]] = icmp ult i64 [[TMP62]], [[TMP63]] -// SHADERTEST-NEXT: [[TMP65:%.*]] = insertelement <3 x i1> [[TMP61]], i1 [[TMP64]], i64 2 -// SHADERTEST-NEXT: br label [[TMP66]] -// SHADERTEST: 66: -// SHADERTEST-NEXT: [[DOT022_IN:%.*]] = phi <3 x i1> [ [[TMP50]], [[TMP36]] ], [ [[TMP65]], [[TMP51]] ] -// SHADERTEST-NEXT: [[TMP67:%.*]] = load i64, ptr addrspace(7) [[TMP0]], align 8 -// SHADERTEST-NEXT: [[TMP68:%.*]] = getelementptr inbounds {{i8|<{ i64, i64, [[]16 x i8], [[]3 x i64], [[]8 x i8], [[]3 x i64] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} -// SHADERTEST-NEXT: [[TMP69:%.*]] = load i64, ptr addrspace(7) [[TMP68]], align 8 -// SHADERTEST-NEXT: [[TMP70:%.*]] = icmp ne i64 [[TMP67]], [[TMP69]] -// SHADERTEST-NEXT: [[COND_FREEZE4:%.*]] = freeze i1 [[TMP70]] -// SHADERTEST-NEXT: br i1 [[COND_FREEZE4]], label [[TMP71:%.*]], label [[TMP73:%.*]] -// SHADERTEST: 71: -// SHADERTEST-NEXT: [[TMP72:%.*]] = icmp sge i64 [[TMP67]], [[TMP69]] -// SHADERTEST-NEXT: br label [[TMP75:%.*]] -// SHADERTEST: 73: -// SHADERTEST-NEXT: [[TMP74:%.*]] = icmp sle i64 [[TMP67]], [[TMP69]] -// SHADERTEST-NEXT: br label [[TMP75]] -// SHADERTEST: 75: -// SHADERTEST-NEXT: [[DOT024_IN:%.*]] = phi i1 [ [[TMP72]], [[TMP71]] ], [ [[TMP74]], [[TMP73]] ] -// SHADERTEST-NEXT: [[COND_FREEZE5:%.*]] = freeze i1 [[DOT024_IN]] -// SHADERTEST-NEXT: br i1 [[COND_FREEZE5]], label [[TMP76:%.*]], label [[TMP80:%.*]] +// SHADERTEST-NEXT: [[TMP64:%.*]] = extractelement <3 x i64> [[TMP54]], i64 2 +// SHADERTEST-NEXT: [[TMP65:%.*]] = icmp ult i64 [[TMP63]], [[TMP64]] +// SHADERTEST-NEXT: [[TMP66:%.*]] = insertelement <3 x i1> [[TMP62]], i1 [[TMP65]], i64 2 +// SHADERTEST-NEXT: br label [[TMP67]] +// SHADERTEST: 67: +// SHADERTEST-NEXT: [[DOT022_IN:%.*]] = phi <3 x i1> [ [[TMP51]], [[TMP37]] ], [ [[TMP66]], [[TMP52]] ] +// SHADERTEST-NEXT: [[TMP68:%.*]] = load i64, ptr addrspace(7) [[TMP0]], align 8 +// SHADERTEST-NEXT: [[TMP69:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 8 +// SHADERTEST-NEXT: [[TMP70:%.*]] = load i64, ptr addrspace(7) [[TMP69]], align 8 +// SHADERTEST-NEXT: [[TMP71:%.*]] = icmp ne i64 [[TMP68]], [[TMP70]] +// SHADERTEST-NEXT: [[COND_FREEZE4:%.*]] = freeze i1 [[TMP71]] +// SHADERTEST-NEXT: br i1 [[COND_FREEZE4]], label [[TMP72:%.*]], label [[TMP74:%.*]] +// SHADERTEST: 72: +// SHADERTEST-NEXT: [[TMP73:%.*]] = icmp sge i64 [[TMP68]], [[TMP70]] +// SHADERTEST-NEXT: br label [[TMP76:%.*]] +// SHADERTEST: 74: +// SHADERTEST-NEXT: [[TMP75:%.*]] = icmp sle i64 [[TMP68]], [[TMP70]] +// SHADERTEST-NEXT: br label [[TMP76]] // SHADERTEST: 76: -// SHADERTEST-NEXT: [[TMP77:%.*]] = load i64, ptr addrspace(7) [[TMP0]], align 8 -// SHADERTEST-NEXT: [[TMP78:%.*]] = load i64, ptr addrspace(7) [[TMP68]], align 8 -// SHADERTEST-NEXT: [[TMP79:%.*]] = icmp sgt i64 [[TMP77]], [[TMP78]] -// SHADERTEST-NEXT: br label [[TMP84:%.*]] -// SHADERTEST: 80: -// SHADERTEST-NEXT: [[TMP81:%.*]] = load i64, ptr addrspace(7) [[TMP0]], align 8 -// SHADERTEST-NEXT: [[TMP82:%.*]] = load i64, ptr addrspace(7) [[TMP68]], align 8 -// SHADERTEST-NEXT: [[TMP83:%.*]] = icmp slt i64 [[TMP81]], [[TMP82]] -// SHADERTEST-NEXT: br label [[TMP84]] -// SHADERTEST: 84: -// SHADERTEST-NEXT: [[DOT0_IN:%.*]] = phi i1 [ [[TMP79]], [[TMP76]] ], [ [[TMP83]], [[TMP80]] ] -// SHADERTEST-NEXT: [[TMP85:%.*]] = extractelement <3 x i1> [[DOT022_IN]], i64 0 -// SHADERTEST-NEXT: [[TMP86:%.*]] = and i1 [[TMP85]], [[DOT0_IN]] -// SHADERTEST-NEXT: [[TMP87:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP86]], float 1.000000e+00, float 0.000000e+00 -// SHADERTEST-NEXT: call void (...) @lgc.create.write.generic.output(float [[TMP87]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) +// SHADERTEST-NEXT: [[DOT024_IN:%.*]] = phi i1 [ [[TMP73]], [[TMP72]] ], [ [[TMP75]], [[TMP74]] ] +// SHADERTEST-NEXT: [[COND_FREEZE5:%.*]] = freeze i1 [[DOT024_IN]] +// SHADERTEST-NEXT: br i1 [[COND_FREEZE5]], label [[TMP77:%.*]], label [[TMP81:%.*]] +// SHADERTEST: 77: +// SHADERTEST-NEXT: [[TMP78:%.*]] = load i64, ptr addrspace(7) [[TMP0]], align 8 +// SHADERTEST-NEXT: [[TMP79:%.*]] = load i64, ptr addrspace(7) [[TMP69]], align 8 +// SHADERTEST-NEXT: [[TMP80:%.*]] = icmp sgt i64 [[TMP78]], [[TMP79]] +// SHADERTEST-NEXT: br label [[TMP85:%.*]] +// SHADERTEST: 81: +// SHADERTEST-NEXT: [[TMP82:%.*]] = load i64, ptr addrspace(7) [[TMP0]], align 8 +// SHADERTEST-NEXT: [[TMP83:%.*]] = load i64, ptr addrspace(7) [[TMP69]], align 8 +// SHADERTEST-NEXT: [[TMP84:%.*]] = icmp slt i64 [[TMP82]], [[TMP83]] +// SHADERTEST-NEXT: br label [[TMP85]] +// SHADERTEST: 85: +// SHADERTEST-NEXT: [[DOT0_IN:%.*]] = phi i1 [ [[TMP80]], [[TMP77]] ], [ [[TMP84]], [[TMP81]] ] +// SHADERTEST-NEXT: [[TMP86:%.*]] = extractelement <3 x i1> [[DOT022_IN]], i64 0 +// SHADERTEST-NEXT: [[TMP87:%.*]] = and i1 [[TMP86]], [[DOT0_IN]] +// SHADERTEST-NEXT: [[TMP88:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP87]], float 1.000000e+00, float 0.000000e+00 +// SHADERTEST-NEXT: call void (...) @lgc.create.write.generic.output(float [[TMP88]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) // SHADERTEST-NEXT: ret void // diff --git a/llpc/test/shaderdb/general/PipelineCs_DebugBreak_intrinsic.pipe b/llpc/test/shaderdb/general/PipelineCs_DebugBreak_intrinsic.pipe index b7508c0bd1..522ceda4e1 100644 --- a/llpc/test/shaderdb/general/PipelineCs_DebugBreak_intrinsic.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_DebugBreak_intrinsic.pipe @@ -47,7 +47,7 @@ userDataNode[0].set = 0 userDataNode[0].binding = 0 ; CHECK-LABEL: @main( ; CHECK-NEXT: .entry: -; CHECK-NEXT: call void (...) @lgc.create.debug.break.V() +; CHECK-NEXT: call void @llvm.amdgcn.s.sethalt(i32 1) ; CHECK-NEXT: ret void ; ; diff --git a/llpc/test/shaderdb/general/PipelineCs_TestDynDescNoSpill_lit.pipe b/llpc/test/shaderdb/general/PipelineCs_TestDynDescNoSpill_lit.pipe index 54612a6a4f..fb508148bb 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestDynDescNoSpill_lit.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestDynDescNoSpill_lit.pipe @@ -4,7 +4,7 @@ ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: %{{.*}} = call {{.*}} {{.*}}@lgc.load.buffer.desc(i64 0, i32 1, i32 0, ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %{{.*}}, i32 0, i32 0, i32 0) +; SHADERTEST: call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %{{.*}}, i32 0, i32 0) ; SHADERTEST: AMDLLPC SUCCESS ; END_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskBased_lit.pipe b/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskBased_lit.pipe index d4f5f53a9d..d3668cd458 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskBased_lit.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskBased_lit.pipe @@ -10,7 +10,7 @@ ; SHADERTEST: call {{.*}} @lgc.create.image.load.with.fmask.v4f32(i32 6, i32 1536, {{.*}}, {{.*}}, <2 x i32> , i32 4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: call i32 @llvm.amdgcn.image.load.2d.i32.i16(i32 1, i16 0, i16 1, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !9 +; SHADERTEST: call i32 @llvm.amdgcn.image.load.2d.i32.i16(i32 1, i16 0, i16 1, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !{{.*}} ; SHADERTEST: %{{.*}} = call {{.*}} <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 0, i32 1, i32 %{{.*}}, <8 x i32> %{{.*}}, i32 0, i32 0) ; SHADERTEST: AMDLLPC SUCCESS ; END_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineMesh_OutputPackingInLds.pipe b/llpc/test/shaderdb/general/PipelineMesh_OutputPackingInLds.pipe new file mode 100644 index 0000000000..c0f7a2fdba --- /dev/null +++ b/llpc/test/shaderdb/general/PipelineMesh_OutputPackingInLds.pipe @@ -0,0 +1,107 @@ +; This test is to check output packing of mesh shader in LDS space. If we treat each location +; of a mesh shader output as vec4, the LDS usage will exceed HW limitation. But if we pack +; those outputs tightly in LDS space, the LDS usage will be reduced greatly. +; +; In this test, we have 25 vertex outputs and 2 primitive outputs. Further, the mesh shader +; uses 2500 dwords shared variable. All consumes LDS space. If output packing in LDS space +; is not performed, the outputs will consume 4 * (25 + 2) * 128 = 13824 dwords. If packing +; is enabled, the LDS consumption will be reduced to (25 + 2) * 128 = 3456. The effect is +; noticeable. + +; BEGIN_SHADERTEST +; RUN: amdllpc -v -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +; SHADERTEST-LABEL: // LLPC mesh shader LDS region info (in dwords) and general info + +; SHADERTEST-LABEL: Per-vertex Output : offset = 0x0083, size = 0x0C80 +; SHADERTEST-LABEL: Per-primitive Output : offset = 0x0D03, size = 0x0100 + +; SHADERTEST-LABEL: Vertex Outputs Layout (stride = 25, exports = 25): +; SHADERTEST-LABEL: -- location = 0, components = 1, offset = 0 +; SHADERTEST-LABEL: -- location = 1, components = 1, offset = 1 +; SHADERTEST-LABEL: -- location = 2, components = 1, offset = 2 +; SHADERTEST-LABEL: -- location = 3, components = 1, offset = 3 +; SHADERTEST-LABEL: -- location = 4, components = 1, offset = 4 +; SHADERTEST-LABEL: -- location = 5, components = 1, offset = 5 +; SHADERTEST-LABEL: -- location = 6, components = 1, offset = 6 +; SHADERTEST-LABEL: -- location = 7, components = 1, offset = 7 +; SHADERTEST-LABEL: -- location = 8, components = 1, offset = 8 +; SHADERTEST-LABEL: -- location = 9, components = 1, offset = 9 +; SHADERTEST-LABEL: -- location = 10, components = 1, offset = 10 +; SHADERTEST-LABEL: -- location = 11, components = 1, offset = 11 +; SHADERTEST-LABEL: -- location = 12, components = 1, offset = 12 +; SHADERTEST-LABEL: -- location = 13, components = 1, offset = 13 +; SHADERTEST-LABEL: -- location = 14, components = 1, offset = 14 +; SHADERTEST-LABEL: -- location = 15, components = 1, offset = 15 +; SHADERTEST-LABEL: -- location = 16, components = 1, offset = 16 +; SHADERTEST-LABEL: -- location = 17, components = 1, offset = 17 +; SHADERTEST-LABEL: -- location = 18, components = 1, offset = 18 +; SHADERTEST-LABEL: -- location = 19, components = 1, offset = 19 +; SHADERTEST-LABEL: -- location = 20, components = 1, offset = 20 +; SHADERTEST-LABEL: -- location = 21, components = 1, offset = 21 +; SHADERTEST-LABEL: -- location = 22, components = 1, offset = 22 +; SHADERTEST-LABEL: -- location = 23, components = 1, offset = 23 +; SHADERTEST-LABEL: -- location = 24, components = 1, offset = 24 + +; SHADERTEST-LABEL: Primitive outputs layout (stride = 2, exports = 2): +; SHADERTEST-LABEL: -- location = 0, components = 1, offset = 0 +; SHADERTEST-LABEL: -- location = 1, components = 1, offset = 1 + +; SHADERTEST: AMDLLPC SUCCESS +; END_SHADERTEST + +[MeshGlsl] +#version 460 core + +#extension GL_EXT_mesh_shader: enable +#extension GL_EXT_shader_explicit_arithmetic_types: enable + +layout(local_size_x=128, local_size_y=1, local_size_z=1) in; +layout(points, max_vertices = 128, max_primitives = 128) out; + +layout(location = 0) out float vertex[][25]; + +layout(location = 25) out perprimitiveEXT float primitive[][2]; + +shared float sharedVar[2500]; + +void main() { + SetMeshOutputsEXT(128, 128); + + for (int i = 0; i < 25; i++) + vertex[gl_LocalInvocationIndex][i] = float(i / 25.0); + + primitive[gl_LocalInvocationIndex][0] = 0.0; + primitive[gl_LocalInvocationIndex][1] = 0.5; + + sharedVar[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex); +} + +[MeshInfo] +entryPoint = main + +[FsGlsl] +#version 460 core + +#extension GL_EXT_mesh_shader: enable + +layout(location = 0) in float vertex[25]; +layout(location = 25) in perprimitiveEXT float primitive[2]; + +layout(location = 0) out vec4 outColor; + +void main() { + outColor = vec4(0.0); + + for (int i = 0; i < 25; i++) + outColor.x += vertex[i]; + + outColor.y += primitive[0]; + outColor.y += primitive[1]; +} + +[FsInfo] +entryPoint = main + +[GraphicsPipelineState] +patchControlPoints = 3 diff --git a/llpc/test/shaderdb/general/PipelineTess_TestInOutPacking.pipe b/llpc/test/shaderdb/general/PipelineTess_TestInOutPacking.pipe index 2d5293e92c..ab1bb64817 100644 --- a/llpc/test/shaderdb/general/PipelineTess_TestInOutPacking.pipe +++ b/llpc/test/shaderdb/general/PipelineTess_TestInOutPacking.pipe @@ -3,10 +3,11 @@ ; SHADERTEST_PP0-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST_PP0: [[VERTEX_BASE:%[0-9a-zA-Z.]+]] = mul i32 %{{[0-9]*}}, 48 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 44 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 45 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 46 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 47 +; SHADERTEST_PP0: [[P0:%[0-9a-zA-Z.]+]] = getelementptr i32, ptr addrspace(3) {{.*}}, i32 [[VERTEX_BASE]] +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(44|176)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(45|180)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(46|184)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(47|188)}} ; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 1 ; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 4 ; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 5 @@ -14,21 +15,21 @@ ; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 9 ; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 10 ; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 12 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 16 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 20 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 24 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 28 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 29 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 30 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 31 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 32 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 33 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 36 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 37 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 38 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 39 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 40 -; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 41 +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(16|64)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(20|80)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(24|96)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(28|112)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(29|116)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(30|120)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(31|124)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(32|128)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(33|132)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(36|144)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(37|148)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(38|152)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(39|156)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(40|160)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(41|164)}} ; SHADERTEST_PP0: call void @llvm.amdgcn.exp.f32(i32 {{.*}}32, i32 {{.*}}15, float %{{[^,]*}}, float %{{[^,]*}}, float %{{[^,]*}}, float %{{[^,]*}}, i1 {{.*}}false, i1 {{.*}}false) ; SHADERTEST_PP0: call void @llvm.amdgcn.exp.f32(i32 {{.*}}33, i32 {{.*}}3, float %{{[^,]*}}, float %{{[^,]*}}, float poison, float poison, i1 {{.*}}false, i1 {{.*}}false) ; SHADERTEST_PP0: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 1, i32 immarg 1, i32 %PrimMask) @@ -53,10 +54,11 @@ ; Pre-rasterization part-pipeline: ; SHADERTEST_PP1-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST_PP1: [[VERTEX_BASE:%[0-9a-zA-Z.]+]] = mul i32 %{{[0-9]*}}, 48 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 44 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 45 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 46 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 47 +; SHADERTEST_PP1: [[P0:%[0-9a-zA-Z.]+]] = getelementptr i32, ptr addrspace(3) {{.*}}, i32 [[VERTEX_BASE]] +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(44|176)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(45|180)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(46|184)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(47|188)}} ; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 1 ; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 4 ; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 5 @@ -64,21 +66,21 @@ ; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 9 ; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 10 ; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 12 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 16 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 20 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 24 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 28 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 29 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 30 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 31 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 32 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 33 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 36 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 37 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 38 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 39 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 40 -; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 41 +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(16|64)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(20|80)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(24|96)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(28|112)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(29|116)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(30|120)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(31|124)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(32|128)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(33|132)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(36|144)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(37|148)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(38|152)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(39|156)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(40|160)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(41|164)}} ; SHADERTEST_PP1: call void @llvm.amdgcn.exp.f32(i32 {{.*}}32, i32 {{.*}}15, float %{{[^,]*}}, float %{{[^,]*}}, float %{{[^,]*}}, float %{{[^,]*}}, i1 {{.*}}false, i1 {{.*}}false) ; SHADERTEST_PP1: call void @llvm.amdgcn.exp.f32(i32 {{.*}}33, i32 {{.*}}3, float %{{[^,]*}}, float %{{[^,]*}}, float poison, float poison, i1 {{.*}}false, i1 {{.*}}false) ; SHADERTEST_PP1: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/general/PipelineTess_XfbWithManyComponents.pipe b/llpc/test/shaderdb/general/PipelineTess_XfbWithManyComponents.pipe new file mode 100644 index 0000000000..75155ed03d --- /dev/null +++ b/llpc/test/shaderdb/general/PipelineTess_XfbWithManyComponents.pipe @@ -0,0 +1,100 @@ +; This test is to check transform feedback from TES while 32 outputs are fully used and they are scalarized to 128 +; components. Therefore, in this case, we have 128 transform feedback write calls. This test is to check if we can +; pack such trasnform feedback outputs correctly in LDS space on GFX11+. + +; BEGIN_SHADERTEST +; RUN: amdllpc -v -gfxip=11 %s | FileCheck -check-prefix=SHADERTEST %s + +; SHADERTEST-LABEL: LLPC geometry calculation factor results +; SHADERTEST: ES-GS ring item size (in dwords): 129 + +; SHADERTEST-LABEL: .fetchXfbOutput +; Write v4[31] = 4.0 -> LDS +; SHADERTEST: [[ldsPtr1:%[0-9]*]] = getelementptr i32, ptr addrspace(3) @Lds.GS, i32 %{{[0-9]*}} +; SHADERTEST-NEXT: store i32 1082130432, ptr addrspace(3) [[ldsPtr1]], align 4 + +; Write v3[31] = 3.0 -> LDS +; SHADERTEST: [[ldsPtr2:%[0-9]*]] = getelementptr i8, ptr addrspace(3) %{{[0-9]*}}, i32 128 +; SHADERTEST-NEXT: store i32 1077936128, ptr addrspace(3) [[ldsPtr2]], align 4 + +; Write v2[31] = 2.0 -> LDS +; SHADERTEST: [[ldsPtr3:%[0-9]*]] = getelementptr i8, ptr addrspace(3) %{{[0-9]*}}, i32 256 +; SHADERTEST-NEXT: store i32 1073741824, ptr addrspace(3) [[ldsPtr3]], align 4 + +; Write v1[31] = 1.0 -> LDS +; SHADERTEST: [[ldsPtr4:%[0-9]*]] = getelementptr i8, ptr addrspace(3) %{{[0-9]*}}, i32 384 +; SHADERTEST-NEXT: store i32 1065353216, ptr addrspace(3) [[ldsPtr4]], align 4 + +; SHADERTEST-LABEL: .exportXfbOutput +; Read v4[31] <- LDS +; SHADERTEST: [[ldsPtr1:%[0-9]*]] = getelementptr i32, ptr addrspace(3) @Lds.GS, i32 %{{[0-9]*}} +; SHADERTEST-NEXT: [[v4:%[0-9]*]] = load float, ptr addrspace(3) [[ldsPtr1]], align 4 +; SHADERTEST-NEXT: [[offset1:%[0-9]*]] = mul i32 %threadIdInSubgroup, 1536 +; SHADERTEST-NEXT: [[offset2:%[0-9]*]] = or disjoint i32 [[offset1]], 508 +; SHADERTEST-NEXT: call void @llvm.amdgcn.raw.tbuffer.store.f32(float [[v4]], <4 x i32> %{{[0-9]*}}, i32 [[offset2]], i32 %{{[0-9]*}}, i32 22, i32 3) + +; Read v3[31] <- LDS +; SHADERTEST: [[ldsPtr2:%[0-9]*]] = getelementptr i8, ptr addrspace(3) %{{[0-9]*}}, i32 128 +; SHADERTEST-NEXT: [[v3:%[0-9]*]] = load float, ptr addrspace(3) [[ldsPtr2]], align 4 +; SHADERTEST-NEXT: [[offset3:%[0-9]*]] = mul i32 %threadIdInSubgroup, 1536 +; SHADERTEST-NEXT: [[offset4:%[0-9]*]] = or disjoint i32 [[offset3]], 380 +; SHADERTEST-NEXT: call void @llvm.amdgcn.raw.tbuffer.store.f32(float [[v3]], <4 x i32> %{{[0-9]*}}, i32 [[offset4]], i32 %{{[0-9]*}}, i32 22, i32 3) + +; Read v2[31] <- LDS +; SHADERTEST: [[ldsPtr3:%[0-9]*]] = getelementptr i8, ptr addrspace(3) %{{[0-9]*}}, i32 256 +; SHADERTEST-NEXT: [[v2:%[0-9]*]] = load float, ptr addrspace(3) [[ldsPtr3]], align 4 +; SHADERTEST-NEXT: [[offset5:%[0-9]*]] = mul i32 %threadIdInSubgroup, 1536 +; SHADERTEST-NEXT: [[offset6:%[0-9]*]] = or disjoint i32 [[offset5]], 252 +; SHADERTEST-NEXT: call void @llvm.amdgcn.raw.tbuffer.store.f32(float [[v2]], <4 x i32> %{{[0-9]*}}, i32 [[offset6]], i32 %{{[0-9]*}}, i32 22, i32 3) + +: Read v1[31] <- LDS +; SHADERTEST: [[ldsPtr4:%[0-9]*]] = getelementptr i8, ptr addrspace(3) %{{[0-9]*}}, i32 384 +; SHADERTEST-NEXT: [[v1:%[0-9]*]] = load float, ptr addrspace(3) [[ldsPtr4]], align 4 +; SHADERTEST-NEXT: [[offset7:%[0-9]*]] = mul i32 %threadIdInSubgroup, 1536 +; SHADERTEST-NEXT: [[offset8:%[0-9]*]] = or disjoint i32 [[offset7]], 124 +; SHADERTEST-NEXT: call void @llvm.amdgcn.raw.tbuffer.store.f32(float [[v1]], <4 x i32> %{{[0-9]*}}, i32 [[offset8]], i32 %{{[0-9]*}}, i32 22, i32 3) + +; SHADERTEST: AMDLLPC SUCCESS +; END_SHADERTEST + +[TcsGlsl] +#version 450 core + +layout(vertices = 3) out; + +void main (void) { + gl_TessLevelInner[0] = 1.0; + gl_TessLevelInner[1] = 1.0; + gl_TessLevelOuter[0] = 1.0; + gl_TessLevelOuter[1] = 1.0; + gl_TessLevelOuter[2] = 1.0; + gl_TessLevelOuter[3] = 2.0; +} + +[TcsInfo] +entryPoint = main + +[TesGlsl] +#version 450 core + +layout(triangles) in; + +layout(location = 0, component = 0, xfb_buffer = 0, xfb_offset = 0) out float v1[32]; +layout(location = 0, component = 1, xfb_buffer = 0, xfb_offset = 128) out float v2[32]; +layout(location = 0, component = 2, xfb_buffer = 0, xfb_offset = 256) out float v3[32]; +layout(location = 0, component = 3, xfb_buffer = 0, xfb_offset = 384) out float v4[32]; + +void main() { + for (int i = 0; i < 32; ++i) { + v1[i] = 1.0; + v2[i] = 2.0; + v3[i] = 3.0; + v4[i] = 4.0; + } +} + +[TesInfo] +entryPoint = main + +[GraphicsPipelineState] +patchControlPoints = 32 diff --git a/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe b/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe index 5373eb6a71..aa08b8a473 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe @@ -77,7 +77,7 @@ attribute[1].binding = 0 attribute[1].format = VK_FORMAT_R32G32_SFLOAT attribute[1].offset = 16 ; SHADERTEST-LABEL: define {{[^@]+}}@lgc.shader.VS.main -; SHADERTEST-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], i32 inreg noundef [[VERTEXBUFFERTABLE:%.*]], i32 inreg noundef [[BASEVERTEX:%.*]], i32 inreg noundef [[BASEINSTANCE:%.*]], i32 noundef [[VERTEXID:%.*]], i32 noundef [[RELVERTEXID:%.*]], i32 noundef [[PRIMITIVEID:%.*]], i32 noundef [[INSTANCEID:%.*]]) #[[ATTR0:[0-9]+]] !lgc.shaderstage !12 { +; SHADERTEST-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], i32 inreg noundef [[VERTEXBUFFERTABLE:%.*]], i32 inreg noundef [[BASEVERTEX:%.*]], i32 inreg noundef [[BASEINSTANCE:%.*]], i32 noundef [[VERTEXID:%.*]], i32 noundef [[RELVERTEXID:%.*]], i32 noundef [[PRIMITIVEID:%.*]], i32 noundef [[INSTANCEID:%.*]]) #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !12 !lgc.shaderstage !13 { ; SHADERTEST-NEXT: .entry: ; SHADERTEST-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; SHADERTEST-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> @@ -86,7 +86,7 @@ attribute[1].offset = 16 ; SHADERTEST-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 ; SHADERTEST-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) ; SHADERTEST-NEXT: [[TMP5:%.*]] = getelementptr <4 x i32>, ptr addrspace(4) [[TMP4]], i64 0 -; SHADERTEST-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP5]], align 16, !invariant.load !13 +; SHADERTEST-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP5]], align 16, !invariant.load !14 ; SHADERTEST-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP6]], i32 [[VERTEXINDEX]], i32 16, i32 0, i32 22, i32 0) ; SHADERTEST-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i64 0 ; SHADERTEST-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP6]], i32 [[VERTEXINDEX]], i32 20, i32 0, i32 22, i32 0) @@ -97,7 +97,7 @@ attribute[1].offset = 16 ; SHADERTEST-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP13]], i32 1 ; SHADERTEST-NEXT: [[VERTEX1_0:%.*]] = bitcast <2 x i32> [[TMP14]] to <2 x float> ; SHADERTEST-NEXT: [[TMP15:%.*]] = getelementptr <4 x i32>, ptr addrspace(4) [[TMP4]], i64 0 -; SHADERTEST-NEXT: [[TMP16:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP15]], align 16, !invariant.load !13 +; SHADERTEST-NEXT: [[TMP16:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP15]], align 16, !invariant.load !14 ; SHADERTEST-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP16]], i32 [[VERTEXINDEX]], i32 0, i32 0, i32 22, i32 0) ; SHADERTEST-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> poison, i32 [[TMP17]], i64 0 ; SHADERTEST-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP16]], i32 [[VERTEXINDEX]], i32 4, i32 0, i32 22, i32 0) @@ -137,7 +137,7 @@ attribute[1].offset = 16 ; ; ; SHADERTEST-LABEL: define {{[^@]+}}@lgc.shader.FS.main -; SHADERTEST-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], i32 inreg noundef [[SAMPLEINFO:%.*]], i32 inreg noundef [[PRIMMASK:%.*]], <2 x float> noundef [[PERSPINTERPSAMPLE:%.*]], <2 x float> noundef [[PERSPINTERPCENTER:%.*]], <2 x float> noundef [[PERSPINTERPCENTROID:%.*]], <3 x float> noundef [[PERSPINTERPPULLMODE:%.*]], <2 x float> noundef [[LINEARINTERPSAMPLE:%.*]], <2 x float> noundef [[LINEARINTERPCENTER:%.*]], <2 x float> noundef [[LINEARINTERPCENTROID:%.*]], float noundef [[LINESTIPPLE:%.*]], float noundef [[FRAGCOORDX:%.*]], float noundef [[FRAGCOORDY:%.*]], float noundef [[FRAGCOORDZ:%.*]], float noundef [[FRAGCOORDW:%.*]], i32 noundef [[FRONTFACING:%.*]], i32 noundef [[ANCILLARY:%.*]], i32 noundef [[SAMPLECOVERAGE:%.*]], i32 noundef [[FIXEDXY:%.*]]) #[[ATTR1:[0-9]+]] !lgc.shaderstage !14 { +; SHADERTEST-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], i32 inreg noundef [[SAMPLEINFO:%.*]], i32 inreg noundef [[PRIMMASK:%.*]], <2 x float> noundef [[PERSPINTERPSAMPLE:%.*]], <2 x float> noundef [[PERSPINTERPCENTER:%.*]], <2 x float> noundef [[PERSPINTERPCENTROID:%.*]], <3 x float> noundef [[PERSPINTERPPULLMODE:%.*]], <2 x float> noundef [[LINEARINTERPSAMPLE:%.*]], <2 x float> noundef [[LINEARINTERPCENTER:%.*]], <2 x float> noundef [[LINEARINTERPCENTROID:%.*]], float noundef [[LINESTIPPLE:%.*]], float noundef [[FRAGCOORDX:%.*]], float noundef [[FRAGCOORDY:%.*]], float noundef [[FRAGCOORDZ:%.*]], float noundef [[FRAGCOORDW:%.*]], i32 noundef [[FRONTFACING:%.*]], i32 noundef [[ANCILLARY:%.*]], i32 noundef [[SAMPLECOVERAGE:%.*]], i32 noundef [[FIXEDXY:%.*]]) #[[ATTR1:[0-9]+]] !spirv.ExecutionModel !15 !lgc.shaderstage !16 { ; SHADERTEST-NEXT: .entry: ; SHADERTEST-NEXT: [[INTERPPERSPSAMPLE:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspSample.v2f32.i32(i32 268435456) #[[ATTR3:[0-9]+]] ; SHADERTEST-NEXT: [[TMP0:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 1, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPSAMPLE]]) diff --git a/llpc/test/shaderdb/general/TestWorkgroupIdOpt.comp b/llpc/test/shaderdb/general/TestWorkgroupIdOpt.comp index 80abd41399..5c2c7ee99b 100644 --- a/llpc/test/shaderdb/general/TestWorkgroupIdOpt.comp +++ b/llpc/test/shaderdb/general/TestWorkgroupIdOpt.comp @@ -16,7 +16,7 @@ void main() test = gl_WorkGroupID.x; } // CHECK-LABEL: define {{[^@]+}}@_amdgpu_cs_main -// CHECK-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[WORKGROUPID1:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], <3 x i32> noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0:[0-9]+]] !lgc.shaderstage !5 { +// CHECK-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[WORKGROUPID1:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], <3 x i32> noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !{{.*}} !lgc.shaderstage !{{.*}} { // CHECK-NEXT: .entry: // CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() // CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296 diff --git a/llpc/test/shaderdb/gfx11/SgprUserDataInit_Cs.pipe b/llpc/test/shaderdb/gfx11/SgprUserDataInit_Cs.pipe index d0acc1b541..c1c9d7d665 100644 --- a/llpc/test/shaderdb/gfx11/SgprUserDataInit_Cs.pipe +++ b/llpc/test/shaderdb/gfx11/SgprUserDataInit_Cs.pipe @@ -2,7 +2,7 @@ ; Test to check that dummy sgpr user data registers are inserted when required ; This test checks wave32 compute shaders -; RUN: amdllpc %gfxip -o - -filetype=asm %s | FileCheck -check-prefix=CHECK %s +; RUN: amdllpc -gfxip=11.0 -o - -filetype=asm %s | FileCheck -check-prefix=CHECK %s [Version] version = 54 @@ -140,8 +140,9 @@ options.threadGroupSwizzleMode = Default ; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: s_load_b256 s[0:7], s[0:1], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: buffer_load_b64 v[0:1], off, s[4:7], 0 -; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_buffer_load_b64 s[4:5], s[4:7], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 ; CHECK-NEXT: buffer_store_format_xy v[0:1], v2, s[0:3], 0 idxen ; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) diff --git a/llpc/test/shaderdb/gfx11/TessFactorStoreWithOpt.pipe b/llpc/test/shaderdb/gfx11/TessFactorStoreWithOpt.pipe index 28cc405ca6..8c25a3d7c9 100644 --- a/llpc/test/shaderdb/gfx11/TessFactorStoreWithOpt.pipe +++ b/llpc/test/shaderdb/gfx11/TessFactorStoreWithOpt.pipe @@ -1,12 +1,12 @@ ; Test to check that the optimization of tessellation factors store are handled as expected - +; REQUIRES: do-not-run-me ; RUN: amdllpc %gfxip %s -v | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: @_amdgpu_hs_main( ; SHADERTEST-LABEL: .distribHsPatchCount: ; SHADERTEST-NEXT: %[[HS_PATCH_COUNT_SHIFT:[^ ,]*]] = lshr i32 %mergeWaveInfo, 16 ; SHADERTEST-NEXT: %[[HS_PATCH_COUNT:[^ ,]*]] = and i32 %[[HS_PATCH_COUNT_SHIFT]], 255 -; SHADERTEST-NEXT: store i32 %[[HS_PATCH_COUNT]], ptr addrspace(3) getelementptr inbounds ([16384 x i32], ptr addrspace(3) @Lds, i32 0, i32 640), align 4 +; SHADERTEST-NEXT: store i32 %[[HS_PATCH_COUNT]], ptr addrspace(3) getelementptr inbounds ([649 x i32], ptr addrspace(3) @Lds.HS, i32 0, i32 640), align 4 ; SHADERTEST-NEXT: br label %.endDistribHsPatchCount ; SHADERTEST-LABEL: .endDistribHsPatchCount: @@ -17,19 +17,19 @@ ; SHADERTEST-NEXT: br i1 %validHsVert, label %.beginHs, label %.endHs ; SHADERTEST-LABEL: .endHs: -; SHADERTEST: %[[HS_PATCH_COUNT:[^ ,]*]] = load i32, ptr addrspace(3) getelementptr inbounds ([16384 x i32], ptr addrspace(3) @Lds, i32 0, i32 640), align 4 +; SHADERTEST: %[[HS_PATCH_COUNT:[^ ,]*]] = load i32, ptr addrspace(3) getelementptr inbounds ([649 x i32], ptr addrspace(3) @Lds.HS, i32 0, i32 640), align 4 ; SHADERTEST: %hsPatchCount = call i32 @llvm.amdgcn.readfirstlane(i32 %[[HS_PATCH_COUNT]]) ; SHADERTEST: %validHsPatch = icmp ult i32 %threadIdInGroup, %hsPatchCount ; SHADERTEST: br i1 %validHsPatch, label %.checkSpecialTfInWave, label %.endCheckSpecialTfInWave ; SHADERTEST-LABEL: .checkSpecialTfInWave: ; SHADERTEST-NEXT: %[[OUTER_TF_OFFSET_0:[^ ,]*]] = mul i32 %threadIdInGroup, 6 -; SHADERTEST-NEXT: %[[OUTER_TF_OFFSET_1:[^ ,]*]] = add i32 %[[OUTER_TF_OFFSET_0]], 256 -; SHADERTEST-NEXT: %[[OUTER_TF_PTR:[^ ,]*]] = getelementptr [16384 x i32], ptr addrspace(3) @Lds, i32 0, i32 %[[OUTER_TF_OFFSET_1]] +; SHADERTEST-NEXT: %[[OUTER_TF_I_PTR:[^ ,]*]] = getelementptr i32, ptr addrspace(3) @Lds.HS, i32 %[[OUTER_TF_OFFSET_0]] +; SHADERTEST-NEXT: %[[OUTER_TF_PTR:[^ ,]*]] = getelementptr {{(i8|i32)}}, ptr addrspace(3) %[[OUTER_TF_I_PTR]], i32 {{(256|1024)}} ; SHADERTEST-NEXT: %[[OUTER_TF:[^ ,]*]] = load <4 x float>, ptr addrspace(3) %[[OUTER_TF_PTR]], align 4 ; SHADERTEST-NEXT: %[[INNER_TF_OFFSET_0:[^ ,]*]] = mul i32 %threadIdInGroup, 6 -; SHADERTEST-NEXT: %[[INNER_TF_OFFSET_1:[^ ,]*]] = add i32 %[[INNER_TF_OFFSET_0]], 260 -; SHADERTEST-NEXT: %[[INNER_TF_PTR:[^ ,]*]] = getelementptr [16384 x i32], ptr addrspace(3) @Lds, i32 0, i32 %[[INNER_TF_OFFSET_1]] +; SHADERTEST-NEXT: %[[INNER_TF_I_PTR:[^ ,]*]] = getelementptr i32, ptr addrspace(3) @Lds.HS, i32 %[[INNER_TF_OFFSET_0]] +; SHADERTEST-NEXT: %[[INNER_TF_PTR:[^ ,]*]] = getelementptr {{(i8|i32)}}, ptr addrspace(3) %[[INNER_TF_I_PTR]], i32 {{(260|1040)}} ; SHADERTEST-NEXT: %[[INNER_TF:[^ ,]*]] = load <2 x float>, ptr addrspace(3) %[[INNER_TF_PTR]], align 4 ; SHADERTEST-NEXT: %[[OUTER_TF_0:[^ ,]*]] = extractelement <4 x float> %[[OUTER_TF]], i64 0 ; SHADERTEST-NEXT: %[[IS_ONE_0:[^ ,]*]] = fcmp oeq float %[[OUTER_TF_0]], 1.000000e+00 @@ -80,11 +80,11 @@ ; SHADERTEST-NEXT: %[[WAVE_ID_OFFSET:[^ ,]*]] = shl nuw nsw i32 %waveIdInGroup, 1 ; SHADERTEST-NEXT: %[[ALL_ONES_OFFSET:[^ ,]*]] = or {{.*}}i32 %[[WAVE_ID_OFFSET]], 641 ; SHADERTEST-NEXT: %[[IS_ALL_ONES_TF:[^ ,]*]] = zext i1 %isAllOnesTfInWave to i32 -; SHADERTEST-NEXT: %[[ALL_ONES_PTR:[^ ,]*]] = getelementptr [16384 x i32], ptr addrspace(3) @Lds, i32 0, i32 %[[ALL_ONES_OFFSET]] +; SHADERTEST-NEXT: %[[ALL_ONES_PTR:[^ ,]*]] = getelementptr i32, ptr addrspace(3) @Lds.HS, i32 %[[ALL_ONES_OFFSET]] ; SHADERTEST-NEXT: store i32 %[[IS_ALL_ONES_TF]], ptr addrspace(3) %[[ALL_ONES_PTR]], align 4 -; SHADERTEST-NEXT: %[[ALL_ZEROS_OFFSET:[^ ,]*]] = add nuw nsw i32 %[[WAVE_ID_OFFSET]], 642 ; SHADERTEST-NEXT: %[[IS_ALL_ZEROS_TF:[^ ,]*]] = zext i1 %isAllZerosTfInWave to i32 -; SHADERTEST-NEXT: %[[ALL_ZEROS_PTR:[^ ,]*]] = getelementptr [16384 x i32], ptr addrspace(3) @Lds, i32 0, i32 %[[ALL_ZEROS_OFFSET]] +; SHADERTEST-NEXT: %[[ALL_ZEROS_I_PTR:[^ ,]*]] = getelementptr i32, ptr addrspace(3) @Lds.HS, i32 %[[WAVE_ID_OFFSET]] +; SHADERTEST-NEXT: %[[ALL_ZEROS_PTR:[^ ,]*]] = getelementptr {{(i8|i32)}}, ptr addrspace(3) %[[ALL_ZEROS_I_PTR]], i32 {{(642|2568)}} ; SHADERTEST-NEXT: store i32 %[[IS_ALL_ZEROS_TF]], ptr addrspace(3) %[[ALL_ZEROS_PTR]], align 4 ; SHADERTEST-NEXT: fence syncscope("workgroup") release ; SHADERTEST-NEXT: call void @llvm.amdgcn.s.barrier() @@ -94,20 +94,18 @@ ; SHADERTEST-LABEL: .checkSpecialTfInGroup: ; SHADERTEST-NEXT: %[[THREAD_ID_OFFSET:[^ ,]*]] = shl i32 %threadIdInWave, 1 -; SHADERTEST-NEXT: %[[ALL_ONES_OFFSET:[^ ,]*]] = add i32 %[[THREAD_ID_OFFSET]], 641 -; SHADERTEST-NEXT: %[[ALL_ONES_PTR:[^ ,]*]] = getelementptr [16384 x i32], ptr addrspace(3) @Lds, i32 0, i32 %[[ALL_ONES_OFFSET]] +; SHADERTEST-NEXT: %[[ALL_ONES_I_PTR:[^ ,]*]] = getelementptr i32, ptr addrspace(3) @Lds.HS, i32 %[[THREAD_ID_OFFSET]] +; SHADERTEST-NEXT: %[[ALL_ONES_PTR:[^ ,]*]] = getelementptr {{(i8|i32)}}, ptr addrspace(3) %[[ALL_ONES_I_PTR]], i32 {{(641|2564)}} ; SHADERTEST-NEXT: %[[IS_ALL_ONES_TF:[^ ,]*]] = load i32, ptr addrspace(3) %[[ALL_ONES_PTR]], align 4 -; SHADERTEST-NEXT: %[[ALL_ONES_VALUE:[^ ,]*]] = and i32 %[[IS_ALL_ONES_TF]], 1 -; SHADERTEST-NEXT: %[[IS_ALL_ONES:[^ ,]*]] = icmp ne i32 %[[ALL_ONES_VALUE]], 0 -; SHADERTEST-NEXT: %[[ALL_ZEROS_OFFSET:[^ ,]*]] = add i32 %[[THREAD_ID_OFFSET]], 642 -; SHADERTEST-NEXT: %[[ALL_ZEROS_PTR:[^ ,]*]] = getelementptr [16384 x i32], ptr addrspace(3) @Lds, i32 0, i32 %[[ALL_ZEROS_OFFSET]] +; SHADERTEST-NEXT: %[[ALL_ONES_VALUE:[^ ,]*]] = trunc i32 %[[IS_ALL_ONES_TF]] to i1 +; SHADERTEST-NEXT: %[[ALL_ZEROS_I_PTR:[^ ,]*]] = getelementptr i32, ptr addrspace(3) @Lds.HS, i32 %[[THREAD_ID_OFFSET]] +; SHADERTEST-NEXT: %[[ALL_ZEROS_PTR:[^ ,]*]] = getelementptr {{(i8|i32)}}, ptr addrspace(3) %[[ALL_ZEROS_I_PTR]], i32 {{(642|2568)}} ; SHADERTEST-NEXT: %[[IS_ALL_ZEROS_TF:[^ ,]*]] = load i32, ptr addrspace(3) %[[ALL_ZEROS_PTR]], align 4 -; SHADERTEST-NEXT: %[[ALL_ZERO_VALUE:[^ ,]*]] = and i32 %[[IS_ALL_ZEROS_TF]], 1 -; SHADERTEST-NEXT: %[[IS_ALL_ZEROS:[^ ,]*]] = icmp ne i32 %[[ALL_ZERO_VALUE]], 0 +; SHADERTEST-NEXT: %[[ALL_ZERO_VALUE:[^ ,]*]] = trunc i32 %[[IS_ALL_ZEROS_TF]] to i1 ; SHADERTEST-NEXT: %[[BALLOT_MASK:[^ ,]*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) -; SHADERTEST-NEXT: %[[ALL_ONES_MASK:[^ ,]*]] = call i64 @llvm.amdgcn.ballot.i64(i1 %[[IS_ALL_ONES]]) +; SHADERTEST-NEXT: %[[ALL_ONES_MASK:[^ ,]*]] = call i64 @llvm.amdgcn.ballot.i64(i1 %[[ALL_ONES_VALUE]]) ; SHADERTEST-NEXT: %[[ALL_ONES_IN_GROUP:[^ ,]*]] = icmp eq i64 %[[ALL_ONES_MASK]], %[[BALLOT_MASK]] -; SHADERTEST-NEXT: %[[ALL_ZEROS_MASK:[^ ,]*]] = call i64 @llvm.amdgcn.ballot.i64(i1 %[[IS_ALL_ZEROS]]) +; SHADERTEST-NEXT: %[[ALL_ZEROS_MASK:[^ ,]*]] = call i64 @llvm.amdgcn.ballot.i64(i1 %[[ALL_ZERO_VALUE]]) ; SHADERTEST-NEXT: %[[ALL_ZEROS_IN_GROUP:[^ ,]*]] = icmp eq i64 %[[ALL_ZEROS_MASK]], %[[BALLOT_MASK]] ; SHADERTEST-NEXT: br label %.endHandleMultiWave diff --git a/llpc/test/shaderdb/graphics_library/PipelineLibCes_TestColorExport.pipe b/llpc/test/shaderdb/graphics_library/PipelineLibCes_TestColorExport.pipe new file mode 100644 index 0000000000..b27952230d --- /dev/null +++ b/llpc/test/shaderdb/graphics_library/PipelineLibCes_TestColorExport.pipe @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py +; Test color export shader +; RUN: amdllpc -o - -filetype=asm %s | FileCheck -check-prefixes=CHECK %s +[Version] +version = 70 + +[GraphicsPipelineState] +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 14 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 0 +unlinked = 1 +enableColorExportShader = 1 + + +[FsOutput] +data=0, 1, 0, 0, 0, 862336118, 50, + + +; CHECK-LABEL: color_export_shader: +; CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 +; CHECK-NEXT: v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 +; CHECK-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm +; CHECK-NEXT: s_endpgm diff --git a/llpc/test/shaderdb/graphics_library/PipelineLibFs_TestFsLibrary.pipe b/llpc/test/shaderdb/graphics_library/PipelineLibFs_TestFsLibrary.pipe new file mode 100644 index 0000000000..192a811de4 --- /dev/null +++ b/llpc/test/shaderdb/graphics_library/PipelineLibFs_TestFsLibrary.pipe @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py +; RUN: amdllpc -o - -filetype=asm %s | FileCheck -check-prefixes=CHECK %s +[Version] +version = 70 + +[FsGlsl] +#version 450 +layout(location = 0) out highp vec4 o_color; +layout(set = 1, binding = 1) uniform buf +{ + vec4 colorTop; + vec4 colorBot; +}; + +void main() +{ + const int middle = 8; + o_color = int(gl_FragCoord.y - 0.5f) < middle ? colorTop : colorBot; +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 2 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 0 +userDataNode[1].visibility = 2 +userDataNode[1].type = StreamOutTableVaPtr +userDataNode[1].offsetInDwords = 3 +userDataNode[1].sizeInDwords = 1 +userDataNode[2].visibility = 64 +userDataNode[2].type = DescriptorTableVaPtr +userDataNode[2].offsetInDwords = 7 +userDataNode[2].sizeInDwords = 1 +userDataNode[2].next[0].type = DescriptorConstBuffer +userDataNode[2].next[0].offsetInDwords = 0 +userDataNode[2].next[0].sizeInDwords = 4 +userDataNode[2].next[0].set = 0x00000001 +userDataNode[2].next[0].binding = 1 +userDataNode[2].next[0].strideInDwords = 0 + +[GraphicsPipelineState] +enableMultiView = 0 +unlinked = 1 +enableColorExportShader = 1 +; CHECK-LABEL: amdgpu_ps_main: +; CHECK: s_getpc_b64 s[8:9] +; CHECK-NEXT: s_mov_b32 s8, s0 +; CHECK-NEXT: v_add_f32_e32 v0, -0.5, v3 +; CHECK-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x0 +; CHECK-NEXT: s_mov_b32 s0, s1 +; CHECK-NEXT: s_mov_b32 s6, s2 +; CHECK-NEXT: s_mov_b32 s32, 0 +; CHECK-NEXT: v_cvt_i32_f32_e32 v0, v0 +; CHECK-NEXT: v_cmp_lt_i32_e32 vcc, 7, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_add_u32 s8, s8, s4 +; CHECK-NEXT: s_getpc_b64 s[4:5] +; CHECK-NEXT: s_addc_u32 s9, s9, 0 +; CHECK-NEXT: s_mov_b32 s1, s5 +; CHECK-NEXT: s_mov_b32 s7, s5 +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; CHECK-NEXT: s_mov_b32 s4, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen +; CHECK-NEXT: s_waitcnt_depctr 0xffe3 +; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] +; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7] diff --git a/llpc/test/shaderdb/graphics_library/PipelineLibVs_TestVsLibrary.pipe b/llpc/test/shaderdb/graphics_library/PipelineLibVs_TestVsLibrary.pipe new file mode 100644 index 0000000000..bfcdfd0082 --- /dev/null +++ b/llpc/test/shaderdb/graphics_library/PipelineLibVs_TestVsLibrary.pipe @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py +; RUN: amdllpc -o - -filetype=asm %s | FileCheck -check-prefixes=CHECK %s +[Version] +version = 70 + +[VsGlsl] +#version 450 +layout(location = 0) in vec4 in_position; +layout(set = 0, binding = 0) uniform buf +{ + vec4 z_coord; +}; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + const float z = gl_VertexIndex < 3 ? z_coord.x : z_coord.y; + gl_Position = vec4(in_position.x, in_position.y, z, 1.0f); +} + +[VsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 2 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 0 +userDataNode[1].visibility = 2 +userDataNode[1].type = DescriptorTableVaPtr +userDataNode[1].offsetInDwords = 1 +userDataNode[1].sizeInDwords = 1 +userDataNode[1].next[0].type = InlineBuffer +userDataNode[1].next[0].offsetInDwords = 0 +userDataNode[1].next[0].sizeInDwords = 1032 +userDataNode[1].next[0].set = 0xFFFFFFFF +userDataNode[1].next[0].binding = 5 +userDataNode[1].next[0].strideInDwords = 0 +userDataNode[2].visibility = 4 +userDataNode[2].type = StreamOutTableVaPtr +userDataNode[2].offsetInDwords = 3 +userDataNode[2].sizeInDwords = 1 +userDataNode[3].visibility = 2 +userDataNode[3].type = DescriptorTableVaPtr +userDataNode[3].offsetInDwords = 5 +userDataNode[3].sizeInDwords = 1 +userDataNode[3].next[0].type = DescriptorConstBuffer +userDataNode[3].next[0].offsetInDwords = 0 +userDataNode[3].next[0].sizeInDwords = 4 +userDataNode[3].next[0].set = 0x00000000 +userDataNode[3].next[0].binding = 0 +userDataNode[3].next[0].strideInDwords = 0 + +[GraphicsPipelineState] +unlinked = 1 +; CHECK-LABEL: amdgpu_vs_main: +; CHECK: s_getpc_b64 s[4:5] +; CHECK-NEXT: s_mov_b32 s0, s1 +; CHECK-NEXT: s_mov_b32 s1, s5 +; CHECK-NEXT: v_add_nc_u32_e32 v0, s2, v0 +; CHECK-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v1, 1.0 +; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, 2, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: exp pos0 v0, v0, v0, v1 done +; CHECK-NEXT: s_endpgm diff --git a/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe b/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe new file mode 100644 index 0000000000..685133b765 --- /dev/null +++ b/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py +; RUN: amdllpc -v %s | FileCheck -check-prefix=SHADERTEST %s + +[Version] +version = 70 + +[GraphicsLibrary] +preRaster=PipelineLibVs_TestVsLibrary.pipe +fragment=PipelineLibFs_TestFsLibrary.pipe +colorExport=PipelineLibCes_TestColorExport.pipe + + +; SHADERTEST-LABEL: @lgc.shader.VS.main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) +; SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) +; SHADERTEST-NEXT: [[TMP2:%.*]] = call <4 x float> @lgc.input.import.generic__v4f32(i1 false, i32 0, i32 0, i32 0, i32 poison) +; SHADERTEST-NEXT: [[TMP3:%.*]] = call i32 @lgc.special.user.data.BaseVertex(i32 268435459) #[[ATTR3:[0-9]+]] +; SHADERTEST-NEXT: [[TMP4:%.*]] = call i32 @lgc.shader.input.VertexId(i32 17) #[[ATTR3]] +; SHADERTEST-NEXT: [[VERTEXINDEX:%.*]] = add i32 [[TMP3]], [[TMP4]] +; SHADERTEST-NEXT: [[DOTFR:%.*]] = freeze i32 [[VERTEXINDEX]] +; SHADERTEST-NEXT: [[TMP5:%.*]] = icmp slt i32 [[DOTFR]], 3 +; SHADERTEST-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 4 +; SHADERTEST-NEXT: [[DOT0_IN:%.*]] = select i1 [[TMP5]], ptr addrspace(7) [[TMP0]], ptr addrspace(7) [[TMP6]] +; SHADERTEST-NEXT: [[DOT0:%.*]] = load float, ptr addrspace(7) [[DOT0_IN]], align 4 +; SHADERTEST-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP2]], float 1.000000e+00, i64 3 +; SHADERTEST-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[DOT0]], i64 2 +; SHADERTEST-NEXT: call void @lgc.output.export.builtin.Position.i32.v4f32(i32 0, <4 x float> [[TMP8]]) #[[ATTR4:[0-9]+]] +; SHADERTEST-NEXT: ret void +; +; +; +; SHADERTEST-LABEL: @lgc.shader.FS.main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 1, i32 1, i32 0, i32 0) +; SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) +; SHADERTEST-NEXT: [[FRAGCOORD:%.*]] = call <4 x float> @lgc.input.import.builtin.FragCoord.v4f32.i32(i32 15) #[[ATTR3]] +; SHADERTEST-NEXT: [[__LLPC_INPUT_PROXY_GL_FRAGCOORD_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[FRAGCOORD]], i64 1 +; SHADERTEST-NEXT: [[TMP2:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[__LLPC_INPUT_PROXY_GL_FRAGCOORD_4_VEC_EXTRACT]], -5.000000e-01 +; SHADERTEST-NEXT: [[TMP3:%.*]] = fptosi float [[TMP2]] to i32 +; SHADERTEST-NEXT: [[DOTFR:%.*]] = freeze i32 [[TMP3]] +; SHADERTEST-NEXT: [[TMP4:%.*]] = icmp slt i32 [[DOTFR]], 8 +; SHADERTEST-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 16 +; SHADERTEST-NEXT: [[DOT0_IN:%.*]] = select i1 [[TMP4]], ptr addrspace(7) [[TMP0]], ptr addrspace(7) [[TMP5]] +; SHADERTEST-NEXT: [[DOT0:%.*]] = load <4 x float>, ptr addrspace(7) [[DOT0_IN]], align 16 +; SHADERTEST-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[DOT0]]) #[[ATTR4]] +; SHADERTEST-NEXT: ret void +; +; +; +; SHADERTEST-LABEL: @color_export_shader( +; SHADERTEST-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0:%.*]], i64 0 +; SHADERTEST-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 +; SHADERTEST-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 +; SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 +; SHADERTEST-NEXT: [[TMP7:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[TMP3]], float [[TMP4]]) +; SHADERTEST-NEXT: [[TMP8:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[TMP5]], float [[TMP6]]) +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> [[TMP7]], <2 x half> [[TMP8]], i1 immarg true, i1 immarg true) #[[ATTR2:[0-9]+]] +; SHADERTEST-NEXT: call void @llvm.amdgcn.endpgm() +; SHADERTEST-NEXT: unreachable +; diff --git a/llpc/test/shaderdb/ray_tracing/TestProcessGpuRtLibrary.rgen b/llpc/test/shaderdb/ray_tracing/TestProcessGpuRtLibrary.rgen new file mode 100644 index 0000000000..9ab6e81397 --- /dev/null +++ b/llpc/test/shaderdb/ray_tracing/TestProcessGpuRtLibrary.rgen @@ -0,0 +1,13 @@ +// RUN: amdllpc %gfxip --print-after=llpc-spirv-lower-gpurt-library 2>&1 %s | FileCheck -check-prefix=CHECK %s +// Disable this test for now as continuations part of GPURT is not opensourced yet. +// REQUIRES: do-not-run-me +#version 460 +#extension GL_EXT_ray_tracing : enable + +void main() +{ +} +// Check these _Amd intrinsics's bodies are deleted. +// CHECK: declare dso_local spir_func i32 @_AmdGetShaderKind() +// CHECK: declare dso_local spir_func i64 @_AmdGetResumePointAddr() +// CHECK: declare dso_local spir_func {{.*}} @_AmdAwait{{.*}}( diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe index 729dace170..76845b01dc 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe @@ -81,14 +81,14 @@ attribute[0].offset = 0 ; SHADERTEST-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.interp.p2(float [[TMP16]], float [[PERSPINTERPCENTER_I1]], i32 immarg 0, i32 immarg 0, i32 [[PRIMMASK]]) ; SHADERTEST-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.interp.p1(float [[PERSPINTERPCENTER_I0]], i32 immarg 1, i32 immarg 0, i32 [[PRIMMASK]]) ; SHADERTEST-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.interp.p2(float [[TMP18]], float [[PERSPINTERPCENTER_I1]], i32 immarg 1, i32 immarg 0, i32 [[PRIMMASK]]) -; SHADERTEST-NEXT: [[DOTI0:%.*]] = fptosi float [[TMP17]] to i32 -; SHADERTEST-NEXT: [[DOTI1:%.*]] = fptosi float [[TMP19]] to i32 ; SHADERTEST-NEXT: [[TMP12:%.*]] = and i64 [[TMP11]], -4294967296 ; SHADERTEST-NEXT: [[TMP13:%.*]] = zext i32 [[DESCTABLE0:%.*]] to i64 ; SHADERTEST-NEXT: [[TMP14:%.*]] = or {{(disjoint )?}}i64 [[TMP12]], [[TMP13]] ; SHADERTEST-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr addrspace(4) -; SHADERTEST-NEXT: [[TMP20:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 32, !invariant.load !10 -; SHADERTEST-NEXT: [[TMP21:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP15]], align 16, !invariant.load !10 +; SHADERTEST-NEXT: [[DOTI0:%.*]] = fptosi float [[TMP17]] to i32 +; SHADERTEST-NEXT: [[DOTI1:%.*]] = fptosi float [[TMP19]] to i32 +; SHADERTEST-NEXT: [[TMP20:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 32, !invariant.load !{{.*}} +; SHADERTEST-NEXT: [[TMP21:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP15]], align 16, !invariant.load !{{.*}} ; SHADERTEST-NEXT: [[DOTI01:%.*]] = sitofp i32 [[DOTI0]] to float ; SHADERTEST-NEXT: [[DOTI12:%.*]] = sitofp i32 [[DOTI1]] to float ; SHADERTEST-NEXT: [[TMP23:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[DOTI01]], float [[DOTI12]], <8 x i32> [[TMP20]], <4 x i32> [[TMP21]], i1 false, i32 0, i32 0) diff --git a/llpc/tool/amdllpc.cpp b/llpc/tool/amdllpc.cpp index ee5a2ddee5..61211f861b 100644 --- a/llpc/tool/amdllpc.cpp +++ b/llpc/tool/amdllpc.cpp @@ -590,25 +590,23 @@ static void initCompileInfo(CompileInfo *compileInfo) { ForceNonUniformResourceIndexStageMask; // Set NGG control settings - if (ParsedGfxIp.major >= 10) { - auto &nggState = compileInfo->gfxPipelineInfo.nggState; - - nggState.enableNgg = EnableNgg; - nggState.enableGsUse = NggEnableGsUse; - nggState.forceCullingMode = NggForceCullingMode; - nggState.compactVertex = NggCompactVertex; - nggState.enableBackfaceCulling = NggEnableBackfaceCulling; - nggState.enableFrustumCulling = NggEnableFrustumCulling; - nggState.enableBoxFilterCulling = NggEnableBoxFilterCulling; - nggState.enableSphereCulling = NggEnableSphereCulling; - nggState.enableSmallPrimFilter = NggEnableSmallPrimFilter; - nggState.enableCullDistanceCulling = NggEnableCullDistanceCulling; - - nggState.backfaceExponent = NggBackfaceExponent; - nggState.subgroupSizing = static_cast(NggSubgroupSizing.getValue()); - nggState.primsPerSubgroup = NggPrimsPerSubgroup; - nggState.vertsPerSubgroup = NggVertsPerSubgroup; - } + auto &nggState = compileInfo->gfxPipelineInfo.nggState; + + nggState.enableNgg = EnableNgg; + nggState.enableGsUse = NggEnableGsUse; + nggState.forceCullingMode = NggForceCullingMode; + nggState.compactVertex = NggCompactVertex; + nggState.enableBackfaceCulling = NggEnableBackfaceCulling; + nggState.enableFrustumCulling = NggEnableFrustumCulling; + nggState.enableBoxFilterCulling = NggEnableBoxFilterCulling; + nggState.enableSphereCulling = NggEnableSphereCulling; + nggState.enableSmallPrimFilter = NggEnableSmallPrimFilter; + nggState.enableCullDistanceCulling = NggEnableCullDistanceCulling; + + nggState.backfaceExponent = NggBackfaceExponent; + nggState.subgroupSizing = static_cast(NggSubgroupSizing.getValue()); + nggState.primsPerSubgroup = NggPrimsPerSubgroup; + nggState.vertsPerSubgroup = NggVertsPerSubgroup; compileInfo->internalRtShaders = EnableInternalRtShaders; } @@ -666,10 +664,12 @@ static Error fixupRtState(RtState &rtState, std::vector &shaderLibraryStor // // @param compiler : LLPC compiler // @param inFiles : Input filename(s) +// @param isGraphicsLibrary : Whether compiled pipeline is library // @returns : `ErrorSuccess` on success, `ResultError` on failure -static Error processInputs(ICompiler *compiler, InputSpecGroup &inputSpecs) { +static Error processInputs(ICompiler *compiler, InputSpecGroup &inputSpecs, bool isGraphicsLibrary) { assert(!inputSpecs.empty()); CompileInfo compileInfo = {}; + compileInfo.isGraphicsLibrary = isGraphicsLibrary; compileInfo.unlinked = true; compileInfo.doAutoLayout = true; std::vector standaloneRtShaders; @@ -684,6 +684,20 @@ static Error processInputs(ICompiler *compiler, InputSpecGroup &inputSpecs) { if (Error err = processInputPipeline(compiler, compileInfo, firstInput, Unlinked, IgnoreColorAttachmentFormats)) return err; + if (compileInfo.pipelineType == VfxPipelineTypeGraphicsLibrary) { + // All input shaders form one group. + SmallVector groups; + append_range(groups, + map_range(compileInfo.inputSpecs, [](const InputSpec &spec) { return InputSpecGroup{spec}; })); + + if (Error err = parallelFor(NumThreads, groups, [compiler](InputSpecGroup &inputGroup) { + return processInputs(compiler, inputGroup, true); + })) { + return err; + } + return Error::success(); + } + if (isRayTracingPipeline(compileInfo.stageMask)) { if (LlpcRaytracingModeSetting.getNumOccurrences()) compileInfo.rayTracePipelineInfo.mode = LlpcRaytracingModeSetting; @@ -889,8 +903,9 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } - if (Error err = parallelFor(NumThreads, *inputGroupsOrErr, - [compiler](InputSpecGroup &inputGroup) { return processInputs(compiler, inputGroup); })) { + if (Error err = parallelFor(NumThreads, *inputGroupsOrErr, [compiler](InputSpecGroup &inputGroup) { + return processInputs(compiler, inputGroup, false); + })) { result = reportError(std::move(err)); return EXIT_FAILURE; } diff --git a/llpc/tool/llpcCompilationUtils.cpp b/llpc/tool/llpcCompilationUtils.cpp index 83386ca47a..01a62c99ce 100644 --- a/llpc/tool/llpcCompilationUtils.cpp +++ b/llpc/tool/llpcCompilationUtils.cpp @@ -395,6 +395,25 @@ Error processInputPipeline(ICompiler *compiler, CompileInfo &compileInfo, const compileInfo.rayTracePipelineInfo = pipelineState->rayPipelineInfo; compileInfo.pipelineType = pipelineState->pipelineType; + if (pipelineState->pipelineType == VfxPipelineTypeGraphicsLibrary) { + LLPC_OUTS("// Pipeline type is Graphics library, compile each stage library:\n"); + for (auto &libFileName : pipelineState->graphicsLibFileName) { + if (!libFileName.empty()) { + LLPC_OUTS(libFileName + "\n"); + auto inputSpecOrErr = parseInputFileSpec(libFileName); + assert(!inputSpecOrErr.takeError()); + compileInfo.inputSpecs.push_back(std::move(*inputSpecOrErr)); + } + } + return Error::success(); + } + + if (!pipelineState->fsOutputs.empty()) { + // Color export shader + compileInfo.fsOutputs = pipelineState->fsOutputs; + compileInfo.isGraphicsLibrary = true; + } + if (ignoreColorAttachmentFormats) { // NOTE: When this option is enabled, we set color attachment format to // R8G8B8A8_SRGB for color target 0. Also, for other color targets, if the diff --git a/llpc/tool/llpcCompilationUtils.h b/llpc/tool/llpcCompilationUtils.h index fa442f521b..e6afe5c463 100644 --- a/llpc/tool/llpcCompilationUtils.h +++ b/llpc/tool/llpcCompilationUtils.h @@ -84,6 +84,8 @@ struct CompileInfo { Llpc::GfxIpVersion gfxIp; // Graphics IP version info llvm::SmallVector inputSpecs; // Input shader specification VkFlags stageMask; // Shader stage mask + bool isGraphicsLibrary; // Whether it is graphics library + std::vector fsOutputs; // Fragment outputs llvm::SmallVector shaderModuleDatas; // ShaderModule Data Llpc::GraphicsPipelineBuildInfo gfxPipelineInfo; // Info to build graphics pipeline Llpc::GraphicsPipelineBuildOut gfxPipelineOut; // Output of building graphics pipeline diff --git a/llpc/tool/llpcGraphicsPipelineBuilder.cpp b/llpc/tool/llpcGraphicsPipelineBuilder.cpp index 23b98f27f0..90bf049ec4 100644 --- a/llpc/tool/llpcGraphicsPipelineBuilder.cpp +++ b/llpc/tool/llpcGraphicsPipelineBuilder.cpp @@ -164,6 +164,26 @@ Expected GraphicsPipelineBuilder::buildGraphicsPipeline() { void *pipelineDumpHandle = runPreBuildActions(localPipelineInfo); auto onExit = make_scope_exit([&] { runPostBuildActions(pipelineDumpHandle, {pipelineOut->pipelineBin}); }); + if (compileInfo.isGraphicsLibrary) { + Result result = Result::Success; + if (compileInfo.stageMask == 0) { + result = getCompiler().BuildColorExportShader(pipelineInfo, compileInfo.fsOutputs.data(), pipelineOut, + pipelineDumpHandle); + + } else if (compileInfo.stageMask & Vkgc::ShaderStageBit::ShaderStageFragmentBit) { + result = + getCompiler().buildGraphicsShaderStage(pipelineInfo, pipelineOut, UnlinkedStageFragment, pipelineDumpHandle); + } else { + result = getCompiler().buildGraphicsShaderStage(pipelineInfo, pipelineOut, UnlinkedStageVertexProcess, + pipelineDumpHandle); + } + + if (result != Result::Success) + return createResultError(result, "Graphics pipeline compilation failed"); + + return pipelineOut->pipelineBin; + } + if (pipelineInfo->enableColorExportShader) { Result result = getCompiler().buildGraphicsShaderStage(pipelineInfo, pipelineOut, UnlinkedStageVertexProcess, pipelineDumpHandle); @@ -173,7 +193,7 @@ Expected GraphicsPipelineBuilder::buildGraphicsPipeline() { result = getCompiler().buildGraphicsShaderStage(pipelineInfo, pipelineOut, UnlinkedStageFragment, pipelineDumpHandle); } - if (result == Result::Success) { + if (result == Result::Success && pipelineOut->fsOutputMetaData != nullptr) { void *fsOuts = compileInfo.pipelineBuf; compileInfo.pipelineBuf = nullptr; result = getCompiler().BuildColorExportShader(pipelineInfo, pipelineOut->fsOutputMetaData, pipelineOut, diff --git a/llpc/tool/llpcPipelineBuilder.cpp b/llpc/tool/llpcPipelineBuilder.cpp index f98a8f0c8f..5fed6df3c5 100644 --- a/llpc/tool/llpcPipelineBuilder.cpp +++ b/llpc/tool/llpcPipelineBuilder.cpp @@ -100,6 +100,12 @@ std::unique_ptr createPipelineBuilder(ICompiler &compiler, Comp if (isRayTracingPipeline(stageMask)) return std::make_unique(compiler, compileInfo, dumpOptions, printPipelineInfo); + if (stageMask == 0) { + // This may be a color export shader. + if (compileInfo.gfxPipelineInfo.enableColorExportShader && !compileInfo.fsOutputs.empty()) + return std::make_unique(compiler, compileInfo, dumpOptions, printPipelineInfo); + } + llvm_unreachable("Unknown pipeline kind"); return nullptr; } diff --git a/llpc/translator/lib/SPIRV/SPIRVInternal.h b/llpc/translator/lib/SPIRV/SPIRVInternal.h index 7da794f819..2452d3b56c 100644 --- a/llpc/translator/lib/SPIRV/SPIRVInternal.h +++ b/llpc/translator/lib/SPIRV/SPIRVInternal.h @@ -468,7 +468,8 @@ union ShaderBlockMetadata { unsigned IsPointer : 1; // Whether it is a pointer unsigned IsStruct : 1; // Whether it is a structure unsigned IsAccelerationStructure : 1; // Whether it is an acceleration structure - unsigned Unused : 16; + unsigned Aliased : 1; // Whether "Aliased" decoration is present + unsigned Unused : 15; }; uint64_t U64All; }; @@ -484,6 +485,7 @@ struct ShaderBlockDecorate { bool Volatile; // Whether "volatile" qualifier is present bool NonWritable; // Whether "readonly" qualifier is present bool NonReadable; // Whether "writeonly" qualifier is present + bool Aliased; // Whether "Aliased" qualifier is present }; /// Flags used for floating-point control diff --git a/llpc/translator/lib/SPIRV/SPIRVReader.cpp b/llpc/translator/lib/SPIRV/SPIRVReader.cpp index d8f2e23b00..f65b03279d 100644 --- a/llpc/translator/lib/SPIRV/SPIRVReader.cpp +++ b/llpc/translator/lib/SPIRV/SPIRVReader.cpp @@ -1,4 +1,4 @@ -//===- SPIRVReader.cpp - Converts SPIR-V to LLVM ----------------*- C++ -*-===// +//===- SPIRVReader.cpp - Converts SPIR-V to LLVM ----------------*- C++ -*-===// // // The LLVM/SPIR-V Translator // @@ -165,6 +165,12 @@ bool isAccelerationStructureType(SPIRVType *type) { return type->isTypeAccelerationStructureKHR(); } +bool isAccelerationStructureMaybeArrayType(SPIRVType *type) { + while (type->getOpCode() == OpTypeArray || type->getOpCode() == OpTypeRuntimeArray) + type = type->getArrayElementType(); + return isAccelerationStructureType(type); +} + SPIRVWord getStd430TypeAlignment(SPIRVType *const spvType) { switch (spvType->getOpCode()) { case OpTypeInt: @@ -196,6 +202,8 @@ SPIRVWord getStd430TypeAlignment(SPIRVType *const spvType) { auto *columnTy = spvType->getMatrixColumnType(); return getStd430TypeAlignment(columnTy); } + case OpTypeImage: + case OpTypeSampler: case OpTypeSampledImage: return 1; default: @@ -223,6 +231,12 @@ SPIRVWord getStd430AlignedTypeSize(SPIRVType *const spvType) { accumulatedOffset = roundUpToMultiple(accumulatedOffset, memberAlign); accumulatedOffset += size; } +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 72 + // Rule 9: + // The structure may have padding at the end; the base offset of the member following the + // sub-structure is rounded up to the next multiple of the base alignment of the structure. + accumulatedOffset = roundUpToMultiple(accumulatedOffset, getStd430TypeAlignment(spvType)); +#endif return accumulatedOffset; } case OpTypeArray: { @@ -341,12 +355,17 @@ Value *SPIRVToLLVM::mapEntry(const SPIRVEntry *be, Value *v) { return m_entryMap[be]; } -unsigned SPIRVToLLVM::getBlockPredecessorCounts(BasicBlock *block, BasicBlock *predecessor) { +unsigned SPIRVToLLVM::getBlockPredecessorCounts(Function *func, BasicBlock *block, BasicBlock *predecessor) { assert(block); // This will create the map entry if it does not already exist. - auto it = m_blockPredecessorToCount.find({block, predecessor}); - if (it != m_blockPredecessorToCount.end()) - return it->second; + auto itfunc = m_blockPredecessorToCount.find(func); + if (itfunc != m_blockPredecessorToCount.end()) { + BlockPredecessorToCountInFunction &blockPredecessorToCountInFunc = itfunc->second; + auto itblock = blockPredecessorToCountInFunc.find({block, predecessor}); + if (itblock != blockPredecessorToCountInFunc.end()) { + return itblock->second; + } + } return 0; } @@ -360,12 +379,24 @@ bool SPIRVToLLVM::isSPIRVBuiltinVariable(GlobalVariable *gv, SPIRVBuiltinVariabl return true; } -SmallVector SPIRVToLLVM::getTranslatedValues(SPIRVValue *bv) { +SmallVector SPIRVToLLVM::getTranslatedValues(SPIRVValue *bv, Function *f, BasicBlock *bb) { + // We didn't map variable value in m_valueMap + if (bv->getOpCode() == OpVariable) { + if (!f) { + auto itNonImage = m_variableNonImageMap.find(bv); + if (itNonImage != m_variableNonImageMap.end()) + if (itNonImage->second) + return {itNonImage->second}; + } + auto it = m_variableMap.find({bv, f}); + if (it != m_variableMap.end()) + return it->second; + } return SmallVector(m_valueMap.lookup(bv)); } -Value *SPIRVToLLVM::getTranslatedValue(SPIRVValue *bv) { - auto values = getTranslatedValues(bv); +Value *SPIRVToLLVM::getTranslatedValue(SPIRVValue *bv, Function *f, BasicBlock *bb) { + auto values = getTranslatedValues(bv, f, bb); assert(values.size() <= 1); return values.empty() ? nullptr : values[0]; } @@ -1045,6 +1076,9 @@ Type *SPIRVToLLVM::transTypeImpl(SPIRVType *t, unsigned matrixStride, bool colum return mapType(t, FunctionType::get(rt, pt, false)); } case OpTypeImage: { + if (layout != LayoutMode::Native) + return getBuilder()->getInt8Ty(); + auto st = static_cast(t); // A buffer image is represented by a texel buffer descriptor. Any other image is represented by an array // of three image descriptors, to allow for multi-plane YCbCr conversion. (The f-mask part of a multi-sampled @@ -1065,6 +1099,9 @@ Type *SPIRVToLLVM::transTypeImpl(SPIRVType *t, unsigned matrixStride, bool colum } case OpTypeSampler: case OpTypeSampledImage: { + if (layout != LayoutMode::Native) + return getBuilder()->getInt8Ty(); + // Get sampler type. // A sampler is represented by a struct containing the sampler itself, and the convertingSamplerIdx, an i32 // that is either 0 or the 1-based index into the converting samplers. @@ -1229,7 +1266,7 @@ void SPIRVToLLVM::setLLVMLoopMetadata(SPIRVLoopMerge *lm, BranchInst *bi) { } SmallVector SPIRVToLLVM::transValueMulti(SPIRVValue *bv, Function *f, BasicBlock *bb, bool createPlaceHolder) { - auto values = m_valueMap.lookup(bv); + auto values = getTranslatedValues(bv, f, bb); if (!values.empty() && (!m_placeholderMap.count(bv) || createPlaceHolder)) return SmallVector(values); @@ -2933,7 +2970,8 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const s Value *SPIRVToLLVM::transLoadImage(SPIRVValue *spvImageLoadPtr) { SPIRVType *spvElementTy = spvImageLoadPtr->getType()->getPointerElementType(); Type *elementTy = transType(spvElementTy, 0, false, false, LayoutMode::Native); - Value *base = transImagePointer(spvImageLoadPtr); + BasicBlock *bb = getBuilder()->GetInsertBlock(); + Value *base = transValue(spvImageLoadPtr, bb->getParent(), bb); return loadImageSampler(elementTy, base); } @@ -3332,11 +3370,12 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue * // // @param spvValue : A SPIR-V value. SmallVector SPIRVToLLVM::transAccessChain(SPIRVValue *const spvValue) { - SPIRVType *baseType = nullptr; + SPIRVValue *spvBase = nullptr; + SPIRVType *spvBaseType = nullptr; SPIRVType *spvAccessType = nullptr; + SPIRVType *spvResultType = nullptr; + std::vector spvIndices; SPIRVStorageClassKind storageClass = StorageClassMax; - SPIRVValue *baseValue = nullptr; - std::vector indices; bool inBound = false; bool hasPtrIndex = false; @@ -3344,18 +3383,21 @@ SmallVector SPIRVToLLVM::transAccessChain(SPIRVValue *const spvValue) { assert((spvValue->getOpCode() == OpAccessChain) || (spvValue->getOpCode() == OpInBoundsAccessChain) || (spvValue->getOpCode() == OpPtrAccessChain) || (spvValue->getOpCode() == OpInBoundsPtrAccessChain)); auto spvAccessChain = static_cast(spvValue); - baseType = spvAccessChain->getBase()->getType(); - spvAccessType = spvAccessChain->getBase()->getType()->getPointerElementType(); - baseValue = spvAccessChain->getBase(); - indices = spvAccessChain->getIndices(); + spvBase = spvAccessChain->getBase(); + spvBaseType = spvBase->getType(); + spvAccessType = spvBaseType->getPointerElementType(); + spvResultType = spvAccessChain->getType()->getPointerElementType(); + spvIndices = spvAccessChain->getIndices(); hasPtrIndex = spvAccessChain->hasPtrIndex(); inBound = spvAccessChain->isInBounds(); } - if (baseType->isTypePointer() || baseType->isTypeForwardPointer()) { - storageClass = static_cast(baseType)->getStorageClass(); + if (spvBaseType->isTypePointer() || spvBaseType->isTypeForwardPointer()) { + storageClass = static_cast(spvBaseType)->getStorageClass(); } LayoutMode layout = LayoutMode::Native; + BasicBlock *bb = getBuilder()->GetInsertBlock(); + Function *f = bb->getParent(); // Special handling for UniformConstant if the ultimate element type is image/sampler/sampledimage. if (storageClass == StorageClassUniformConstant) { @@ -3368,170 +3410,269 @@ SmallVector SPIRVToLLVM::transAccessChain(SPIRVValue *const spvValue) { case OpTypeImage: case OpTypeSampler: case OpTypeSampledImage: - return {transOpAccessChainForImage(static_cast(spvValue))}; + break; default: layout = isAccelerationStructureType(spvUltimateElementType) ? LayoutMode::Explicit : LayoutMode::Std430; break; } } - // Non-image-related handling. - Value *base = transValue(baseValue, getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); - auto srcIndices = transValue(indices, getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); + // Special for UniformConstant: determine whether result/base is the mixed image/non-image case + bool baseHasImage = false; + bool baseHasNonImage = false; + bool resultHasImage = false; + bool resultHasNonImage = false; + if (storageClass == StorageClassUniformConstant) { + auto detectImage = [](SPIRVType *type) -> std::pair { + bool hasImage = false; + bool hasNonImage = false; + SmallVector elementWorklist; + elementWorklist.push_back(type); + while (!elementWorklist.empty()) { + SPIRVType *spvTy = elementWorklist.pop_back_val(); + switch (spvTy->getOpCode()) { + case OpTypeImage: + case OpTypeSampler: + case OpTypeSampledImage: + hasImage = true; + break; + case OpTypeArray: + case OpTypeRuntimeArray: + elementWorklist.push_back(spvTy->getArrayElementType()); + break; + case OpTypeStruct: + for (int i = 0, e = spvTy->getStructMemberCount(); i < e; i++) + elementWorklist.push_back(spvTy->getStructMemberType(i)); + hasNonImage = true; + break; + default: + hasNonImage = true; + } + } + return std::make_pair(hasImage, hasNonImage); + }; + + auto pair = detectImage(spvResultType); + resultHasImage = pair.first; + resultHasNonImage = pair.second; + pair = detectImage(spvAccessType); + baseHasImage = pair.first; + baseHasNonImage = pair.second; + } - truncConstantIndex(srcIndices, getBuilder()->GetInsertBlock()); + // Translate the base variable and indices + auto fullBase = transValueMulti(spvBase, f, bb); + auto srcIndices = transValue(spvIndices, f, bb); + truncConstantIndex(srcIndices, bb); if (!hasPtrIndex) srcIndices.insert(srcIndices.begin(), getBuilder()->getInt32(0)); - const SPIRVStorageClassKind pointerStorageClass = baseType->getPointerStorageClass(); + SmallVector result; - const bool typeMaybeRemapped = - isStorageClassExplicitlyLaidOut(m_bm, pointerStorageClass) || pointerStorageClass == StorageClassUniformConstant; + // First, translate the access chain for any non-image parts. + if (!resultHasImage) { + Value *base = fullBase[0]; + const SPIRVStorageClassKind pointerStorageClass = spvBaseType->getPointerStorageClass(); - Type *basePointeeType = nullptr; - { basePointeeType = getPointeeType(baseValue, layout); } + const bool typeMaybeRemapped = isStorageClassExplicitlyLaidOut(m_bm, pointerStorageClass) || + pointerStorageClass == StorageClassUniformConstant; - SmallVector gepIndices; + Type *basePointeeType = nullptr; + { basePointeeType = getPointeeType(spvBase, layout); } - if (baseType->isTypeForwardPointer()) { - baseType = static_cast(baseType)->getPointer(); - } - assert(baseType->isTypePointer()); - gepIndices.push_back(srcIndices[0]); + SmallVector gepIndices; - if (baseType->isTypePointer()) - spvAccessType = baseType->getPointerElementType(); + if (spvBaseType->isTypeForwardPointer()) { + spvBaseType = static_cast(spvBaseType)->getPointer(); + } + assert(spvBaseType->isTypePointer()); + gepIndices.push_back(srcIndices[0]); + + if (spvBaseType->isTypePointer()) + spvAccessType = spvBaseType->getPointerElementType(); - auto flushGep = [&]() { - if (gepIndices.size() == 1) { - if (auto *constant = dyn_cast(gepIndices[0])) { - if (constant->getZExtValue() == 0) - return; // no-op + auto flushGep = [&]() { + if (gepIndices.size() == 1) { + if (auto *constant = dyn_cast(gepIndices[0])) { + if (constant->getZExtValue() == 0) + return; // no-op + } } - } - if (inBound) - base = getBuilder()->CreateInBoundsGEP(basePointeeType, base, gepIndices); - else - base = getBuilder()->CreateGEP(basePointeeType, base, gepIndices); + if (inBound) + base = getBuilder()->CreateInBoundsGEP(basePointeeType, base, gepIndices); + else + base = getBuilder()->CreateGEP(basePointeeType, base, gepIndices); - gepIndices.clear(); - gepIndices.push_back(getBuilder()->getInt32(0)); - }; + gepIndices.clear(); + gepIndices.push_back(getBuilder()->getInt32(0)); + }; - // Run over the indices and map the SPIR-V level indices to LLVM indices, which may be different because the LLVM - // types may contain manual padding fields to model the power of Vulkan's layout options. - // Additionally, break up the GEP sequence to handle some special cases like row major matrices. - for (Value *index : ArrayRef(srcIndices).drop_front()) { - switch (spvAccessType->getOpCode()) { - case OpTypeStruct: { - ConstantInt *constIndex = cast(index); - const uint64_t origMemberIndex = constIndex->getZExtValue(); - Type *castType = nullptr; - - if (typeMaybeRemapped) { - if (isRemappedTypeElements(spvAccessType)) { - const uint64_t remappedMemberIndex = lookupRemappedTypeElements(spvAccessType, origMemberIndex); - constIndex = getBuilder()->getInt32(remappedMemberIndex); + // Run over the indices and map the SPIR-V level indices to LLVM indices, which may be different because the LLVM + // types may contain manual padding fields to model the power of Vulkan's layout options. + // Additionally, break up the GEP sequence to handle some special cases like row major matrices. + for (Value *index : ArrayRef(srcIndices).drop_front()) { + switch (spvAccessType->getOpCode()) { + case OpTypeStruct: { + ConstantInt *constIndex = cast(index); + const uint64_t origMemberIndex = constIndex->getZExtValue(); + Type *castType = nullptr; + + if (typeMaybeRemapped) { + if (isRemappedTypeElements(spvAccessType)) { + const uint64_t remappedMemberIndex = lookupRemappedTypeElements(spvAccessType, origMemberIndex); + constIndex = getBuilder()->getInt32(remappedMemberIndex); + } + + // If the struct member was actually overlapping another struct member, we need a split here. + const auto structIndexPair = std::make_pair(spvAccessType, origMemberIndex); + + if (m_overlappingStructTypeWorkaroundMap.count(structIndexPair) > 0) + castType = m_overlappingStructTypeWorkaroundMap[structIndexPair]; } - // If the struct member was actually overlapping another struct member, we need a split here. - const auto structIndexPair = std::make_pair(spvAccessType, origMemberIndex); + gepIndices.push_back(constIndex); + + if (castType) { + flushGep(); + basePointeeType = castType; + base = getBuilder()->CreateBitCast(base, + basePointeeType->getPointerTo(base->getType()->getPointerAddressSpace())); + } - if (m_overlappingStructTypeWorkaroundMap.count(structIndexPair) > 0) - castType = m_overlappingStructTypeWorkaroundMap[structIndexPair]; + spvAccessType = spvAccessType->getStructMemberType(origMemberIndex); + break; } + case OpTypeArray: + case OpTypeRuntimeArray: { + gepIndices.push_back(index); - gepIndices.push_back(constIndex); + if (typeMaybeRemapped && isRemappedTypeElements(spvAccessType)) { + // If we have padding in an array, we inserted a struct to add that + // padding, and so we need an extra constant 0 index. + gepIndices.push_back(getBuilder()->getInt32(0)); + } - if (castType) { - flushGep(); - basePointeeType = castType; - base = - getBuilder()->CreateBitCast(base, basePointeeType->getPointerTo(base->getType()->getPointerAddressSpace())); + spvAccessType = spvAccessType->getArrayElementType(); + break; } + case OpTypeMatrix: { + // Matrices are represented as an array of columns. + Type *const matrixType = GetElementPtrInst::getIndexedType(basePointeeType, gepIndices); + assert(matrixType && matrixType->isArrayTy()); - spvAccessType = spvAccessType->getStructMemberType(origMemberIndex); - break; - } - case OpTypeArray: - case OpTypeRuntimeArray: { - gepIndices.push_back(index); + if (typeMaybeRemapped && isTypeWithPadRowMajorMatrix(matrixType)) { + // We have a row major matrix, we need to split the access chain here to handle it. + flushGep(); - if (typeMaybeRemapped && isRemappedTypeElements(spvAccessType)) { - // If we have padding in an array, we inserted a struct to add that - // padding, and so we need an extra constant 0 index. - gepIndices.push_back(getBuilder()->getInt32(0)); - } + auto pair = createLaunderRowMajorMatrix(matrixType, base); + basePointeeType = pair.first; + base = pair.second; - spvAccessType = spvAccessType->getArrayElementType(); - break; - } - case OpTypeMatrix: { - // Matrices are represented as an array of columns. - Type *const matrixType = GetElementPtrInst::getIndexedType(basePointeeType, gepIndices); - assert(matrixType && matrixType->isArrayTy()); + gepIndices.push_back(index); + } else { + gepIndices.push_back(index); + if (matrixType->getArrayElementType()->isStructTy()) { + // If the type of the column is a struct we had to add padding to align, so need a further index. + gepIndices.push_back(getBuilder()->getInt32(0)); + } + } - if (typeMaybeRemapped && isTypeWithPadRowMajorMatrix(matrixType)) { - // We have a row major matrix, we need to split the access chain here to handle it. + spvAccessType = spvAccessType->getMatrixColumnType(); + break; + } + case OpTypeVector: { + gepIndices.push_back(index); + spvAccessType = spvAccessType->getVectorComponentType(); + break; + } + case OpTypeCooperativeMatrixKHR: { flushGep(); + auto use = spvAccessType->getCooperativeMatrixKHRUse(); + unsigned rows = spvAccessType->getCooperativeMatrixKHRRows(); + unsigned columns = spvAccessType->getCooperativeMatrixKHRColumns(); + spvAccessType = spvAccessType->getCooperativeMatrixKHRComponentType(); + basePointeeType = transType(spvAccessType); + lgc::CooperativeMatrixElementType elemType = mapToBasicType(spvAccessType); + lgc::CooperativeMatrixLayout layout = + getCooperativeMatrixKHRLayout(static_cast(use), elemType, rows, columns); + + std::string mangledName(LlpcName::SpirvCooperativeMatrixProxy); + Value *args[] = { + base, + getBuilder()->getInt32((unsigned)elemType), + getBuilder()->getInt32((unsigned)layout), + }; + Type *retType = basePointeeType->getPointerTo(base->getType()->getPointerAddressSpace()); + appendTypeMangling(retType, args, mangledName); + base = getBuilder()->CreateNamedCall(mangledName, retType, args, {Attribute::ReadNone, Attribute::NoUnwind}); + + gepIndices[0] = index; + break; + } + default: + llvm_unreachable("unhandled type in access chain"); + } + } - auto pair = createLaunderRowMajorMatrix(matrixType, base); - basePointeeType = pair.first; - base = pair.second; + Type *finalPointeeType = GetElementPtrInst::getIndexedType(basePointeeType, gepIndices); + flushGep(); - gepIndices.push_back(index); - } else { - gepIndices.push_back(index); - if (matrixType->getArrayElementType()->isStructTy()) { - // If the type of the column is a struct we had to add padding to align, so need a further index. - gepIndices.push_back(getBuilder()->getInt32(0)); - } + tryAddAccessChainRetType(spvValue, finalPointeeType); + result.push_back(base); + } + + // Second, translate the access chain for any mixed-image parts. + if (resultHasImage) { + Value *base = fullBase[baseHasNonImage ? 1 : 0]; + + if (spvIndices.empty()) { + result.push_back(base); + } else { + // 'proxyType' is the replaced type for struct/array type with image/sampler member. + // In which, image/sampler member is replaced by int8 type, and non-image member is replaced by empty sturct. + Type *proxyType = transType(spvAccessType, 0, true, true, layout); + SPIRVTypeContext ctx(spvAccessType, 0, true, true, layout); + auto it = m_imageTypeMap.find(ctx.asTuple()); + if (it != m_imageTypeMap.end()) + proxyType = it->second; + + // Calculate the offset: + // 1. Calculate the current accessed member in the proxyType: offset = offsetof(proxyType, proxyType[srcIndices]); + // 2. Correct offset if 'proxyType' is sturct: + // a. If it has pre OpAccessChain, add the pre offset value from pre OpAccessChain + // b. If not have pre OpAccessChain, the 'offset' value won't be modified. + // Because during transValueMultiWithOpcode(spvBase), we append a constant value 0, so this + // always add 0. + // c. If 'proxyType' is array type(which means the `baseHasNonImage` is false), we didn't append 0 for base + // variable, so should NOT correct the offset value. + Value *proxy = getBuilder()->CreateGEP(proxyType, ConstantPointerNull::get(getBuilder()->getPtrTy()), srcIndices); + Value *offset = getBuilder()->CreatePtrToInt(proxy, getBuilder()->getInt32Ty()); + + // Translate the image/sampler member pointer. + SPIRVType *spvElementType = spvAccessType; + + SmallVector elementWorklist; + elementWorklist.push_back(spvElementType); + while (!elementWorklist.empty()) { + spvElementType = elementWorklist.pop_back_val(); + if (spvElementType->getOpCode() == OpTypeImage || spvElementType->getOpCode() == OpTypeSampler || + spvElementType->getOpCode() == OpTypeSampledImage) + break; + else if (spvElementType->getOpCode() == OpTypeArray || spvElementType->getOpCode() == OpTypeRuntimeArray) + elementWorklist.push_back(spvElementType->getArrayElementType()); + else if (spvElementType->getOpCode() == OpTypeStruct) + for (int i = 0, e = spvElementType->getStructMemberCount(); i < e; i++) + elementWorklist.push_back(spvElementType->getStructMemberType(i)); } - spvAccessType = spvAccessType->getMatrixColumnType(); - break; - } - case OpTypeVector: { - gepIndices.push_back(index); - spvAccessType = spvAccessType->getVectorComponentType(); - break; - } - case OpTypeCooperativeMatrixKHR: { - flushGep(); - auto use = spvAccessType->getCooperativeMatrixKHRUse(); - unsigned rows = spvAccessType->getCooperativeMatrixKHRRows(); - unsigned columns = spvAccessType->getCooperativeMatrixKHRColumns(); - spvAccessType = spvAccessType->getCooperativeMatrixKHRComponentType(); - basePointeeType = transType(spvAccessType); - lgc::CooperativeMatrixElementType elemType = mapToBasicType(spvAccessType); - lgc::CooperativeMatrixLayout layout = - getCooperativeMatrixKHRLayout(static_cast(use), elemType, rows, columns); - - std::string mangledName(LlpcName::SpirvCooperativeMatrixProxy); - Value *args[] = { - base, - getBuilder()->getInt32((unsigned)elemType), - getBuilder()->getInt32((unsigned)layout), - }; - Type *retType = basePointeeType->getPointerTo(base->getType()->getPointerAddressSpace()); - appendTypeMangling(retType, args, mangledName); - base = getBuilder()->CreateNamedCall(mangledName, retType, args, {Attribute::ReadNone, Attribute::NoUnwind}); - - gepIndices[0] = index; - break; - } - default: - llvm_unreachable("unhandled type in access chain"); + Type *imageSamplerType = transType(spvElementType); + result.push_back(indexDescPtr(imageSamplerType, base, offset)); } } - Type *finalPointeeType = GetElementPtrInst::getIndexedType(basePointeeType, gepIndices); - flushGep(); - - tryAddAccessChainRetType(spvValue, finalPointeeType); - return {base}; + return result; } // ===================================================================================================================== @@ -3542,38 +3683,6 @@ template <> SmallVector SPIRVToLLVM::transValueMultiWithOpcodegetBase()->getType()->getPointerElementType(); - std::vector spvIndicesVec = spvAccessChain->getIndices(); - ArrayRef spvIndices = spvIndicesVec; - Value *base = transImagePointer(spvAccessChain->getBase()); - - if (spvIndices.empty()) - return base; - - Value *index = transValue(spvIndices[0], getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); - spvIndices = spvIndices.slice(1); - spvElementType = spvElementType->getArrayElementType(); - - while (spvElementType->getOpCode() == OpTypeArray) { - index = getBuilder()->CreateMul( - index, getBuilder()->getInt32(static_cast(spvElementType)->getLength()->getZExtIntValue())); - if (!spvIndices.empty()) { - index = getBuilder()->CreateAdd(index, transValue(spvIndices[0], getBuilder()->GetInsertBlock()->getParent(), - getBuilder()->GetInsertBlock())); - spvIndices = spvIndices.slice(1); - } - spvElementType = spvElementType->getArrayElementType(); - } - - Type *elementTy = transType(spvElementType, 0, false, false, LayoutMode::Native); - return indexDescPtr(elementTy, base, index); -} - // ===================================================================================================================== // Apply an array index to a pointer to array of image/sampler/sampledimage. // A pointer to sampledimage is in fact a structure containing pointer to image and pointer to sampler. @@ -3706,6 +3815,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const s // // @param spvValue : A SPIR-V value. template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const spvValue) { + m_hasDemoteToHelper = true; return getBuilder()->CreateDemoteToHelperInvocation(); } @@ -4155,6 +4265,12 @@ Value *SPIRV::SPIRVToLLVM::createTraceRayDialectOp(SPIRVValue *const spvValue) { auto accelStructAsI64 = getBuilder()->CreateBitCast(accelStruct, getBuilder()->getInt64Ty()); Type *payloadTy = transType(spvOperands[10]->getType()->getPointerElementType()); + + // Wrap payload with struct, PAQ handling expects a struct type. + // FIXME: We should support non-struct types for PAQ + if (getRaytracingContext()->isContinuationsMode() && !payloadTy->isStructTy()) + payloadTy = StructType::get(*m_context, {payloadTy}, ""); + auto paq = getPaqFromSize(getBuilder()->getContext(), alignTo(m_m->getDataLayout().getTypeAllocSize(payloadTy), 4)); CallInst *call = nullptr; @@ -4184,6 +4300,11 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRV Type *callableDataTy = transType(spvOperands[1]->getType()->getPointerElementType()); unsigned dataByteSize = alignTo(m_m->getDataLayout().getTypeAllocSize(callableDataTy), 4); + // Wrap payload with struct, PAQ handling expects a struct type. + // FIXME: We should support non-struct types for PAQ + if (getRaytracingContext()->isContinuationsMode() && !callableDataTy->isStructTy()) + callableDataTy = StructType::get(*m_context, {callableDataTy}, ""); + auto *call = getBuilder()->create(shaderIndex, callableData, dataByteSize); // Store a poison value as metadata to track callable data type. ContHelper::setPayloadTypeMetadata(call, callableDataTy); @@ -4838,15 +4959,50 @@ Constant *SPIRVToLLVM::transInitializer(SPIRVValue *const spvValue, Type *const // Handle OpVariable. // // @param spvValue : A SPIR-V value. -template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const spvValue) { - return transVariable(spvValue); +template <> +SmallVector SPIRVToLLVM::transValueMultiWithOpcode(SPIRVValue *const spvValue, Function *f, + BasicBlock *bb) { + auto it = m_variableMap.find({spvValue, f}); + if (it != m_variableMap.end()) + return it->second; + + auto itNonImage = m_variableNonImageMap.find(spvValue); + if (itNonImage == m_variableNonImageMap.end()) + itNonImage = m_variableNonImageMap.try_emplace(spvValue, transVariableNonImage(spvValue)).first; + + SmallVector values; + if (itNonImage->second) + values.push_back(itNonImage->second); + + auto spvVar = static_cast(spvValue); + const SPIRVStorageClassKind storageClass = spvVar->getStorageClass(); + bool variableHasImage = false; + if (storageClass == StorageClassUniformConstant) { + SPIRVType *spvElementType = spvVar->getType()->getPointerElementType(); + while (spvElementType->getOpCode() == OpTypeArray || spvElementType->getOpCode() == OpTypeRuntimeArray) + spvElementType = spvElementType->getArrayElementType(); + if (spvElementType->getOpCode() == OpTypeImage || spvElementType->getOpCode() == OpTypeSampler || + spvElementType->getOpCode() == OpTypeSampledImage) { + variableHasImage = true; + } + } + + // Add image descriptor parts if required + if (f && variableHasImage) { + IRBuilderBase::InsertPointGuard ipg(*getBuilder()); + getBuilder()->SetInsertPointPastAllocas(f); + values.push_back(transImagePointer(spvVar, spvVar->getMemObjType())); + } + + m_variableMap.try_emplace({spvValue, f}, values); + return values; } // ===================================================================================================================== -// Handle OpVariable/OpUntypedVariableKHR. +// Handle the non-image/sampler aspect of OpVariable/OpUntypedVariableKHR. // // @param spvValue : A SPIR-V value. -Value *SPIRVToLLVM::transVariable(SPIRVValue *const spvValue) { +Value *SPIRVToLLVM::transVariableNonImage(SPIRVValue *const spvValue) { auto spvVar = static_cast(spvValue); const SPIRVStorageClassKind storageClass = spvVar->getStorageClass(); SPIRVType *spvVarType = spvVar->getMemObjType(); @@ -5634,7 +5790,7 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu getBuilder()->SetInsertPoint(bb); updateDebugLoc(bv, f); } - return mapValue(bv, transValueWithOpcode(bv)); + return transValueMultiWithOpcode(bv, f, bb); default: // do nothing @@ -5676,7 +5832,7 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu else if (br->getBasicBlock()->getLoopMerge()) setLLVMLoopMetadata(br->getBasicBlock()->getLoopMerge(), bi); - recordBlockPredecessor(successor, bb); + recordBlockPredecessor(f, successor, bb); return mapValue(bv, bi); } @@ -5688,9 +5844,11 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu // int/float arguments as the branch condition if (SPIRVWorkaroundBadSPIRV) { if (c->getType()->isFloatTy()) - c = new llvm::FCmpInst(*bb, llvm::CmpInst::FCMP_ONE, c, llvm::ConstantFP::get(c->getType(), 0.0)); + c = llvm::CmpInst::Create(llvm::Instruction::FCmp, llvm::CmpInst::FCMP_ONE, c, + llvm::ConstantFP::get(c->getType(), 0.0), "", bb); else if (c->getType()->isIntegerTy() && !c->getType()->isIntegerTy(1)) - c = new llvm::ICmpInst(*bb, llvm::CmpInst::ICMP_NE, c, llvm::ConstantInt::get(c->getType(), 0)); + c = llvm::CmpInst::Create(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, c, + llvm::ConstantInt::get(c->getType(), 0), "", bb); } auto trueSuccessor = cast(transValue(br->getTrueLabel(), f, bb)); @@ -5710,8 +5868,8 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu else if (br->getBasicBlock()->getLoopMerge()) setLLVMLoopMetadata(br->getBasicBlock()->getLoopMerge(), bc); - recordBlockPredecessor(trueSuccessor, bb); - recordBlockPredecessor(falseSuccessor, bb); + recordBlockPredecessor(f, trueSuccessor, bb); + recordBlockPredecessor(f, falseSuccessor, bb); return mapValue(bv, bc); } @@ -5824,7 +5982,7 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu auto bs = static_cast(bv); auto select = transValue(bs->getSelect(), f, bb); auto defaultSuccessor = dyn_cast(transValue(bs->getDefault(), f, bb)); - recordBlockPredecessor(defaultSuccessor, bb); + recordBlockPredecessor(f, defaultSuccessor, bb); // OpSwitch can branch with OpUndef as condition. The selected jump target is undefined. // In LLVM IR, SwitchInst with undef value is fully UB. @@ -5843,7 +6001,7 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu auto successor = cast(transValue(label, f, bb)); ls->addCase(ConstantInt::get(dyn_cast(select->getType()), literal), successor); - recordBlockPredecessor(successor, bb); + recordBlockPredecessor(f, successor, bb); }); return mapValue(bv, ls); } @@ -6090,12 +6248,6 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu SmallVector args; for (SPIRVValue *bArg : bc->getArgumentValues()) { Value *arg = transValue(bArg, f, bb); - if (!arg) { - // This arg is a variable that is (array of) image/sampler/sampledimage. - // Materialize it. - assert(bArg->getOpCode() == OpVariable); - arg = transImagePointer(bArg); - } args.push_back(arg); } auto call = CallInst::Create(transFunction(bc->getFunction()), args, "", bb); @@ -6820,7 +6972,7 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *bf) { for (unsigned i = 0; i != initialNumIncoming; ++i) { BasicBlock *predecessor = phi.getIncomingBlock(i); Value *incomingValue = phi.getIncomingValue(i); - const unsigned numIncomingArcsForPred = getBlockPredecessorCounts(&bb, predecessor); + const unsigned numIncomingArcsForPred = getBlockPredecessorCounts(f, &bb, predecessor); for (unsigned j = 1; j < numIncomingArcsForPred; ++j) phi.addIncoming(incomingValue, predecessor); @@ -6828,7 +6980,7 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *bf) { } } - m_blockPredecessorToCount.clear(); + m_blockPredecessorToCount.erase(f); auto getContArgTy = [&](SPIRVType *argTy) { if (argTy->isTypePointer()) { @@ -6841,18 +6993,13 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *bf) { return ContArgTy(transType(argTy)); }; - // Special handling for GPURT intrinsic function _Amd* and _cont_ - if (f->getName().starts_with("_Amd") || f->getName().starts_with("_cont_")) { - SmallVector argTys; - - for (unsigned i = 0; i < bf->getNumArguments(); ++i) { - auto argTy = bf->getArgument(i)->getType(); - argTys.push_back(getContArgTy(argTy)); - } - - ContFuncTy funcTys(getContArgTy(bf->getType()), argTys); - funcTys.writeMetadata(f); + SmallVector argTys; + for (unsigned i = 0; i < bf->getNumArguments(); ++i) { + auto argTy = bf->getArgument(i)->getType(); + argTys.push_back(getContArgTy(argTy)); } + ContFuncTy funcTys(getContArgTy(bf->getType()), argTys); + funcTys.writeMetadata(f); return f; } @@ -8565,6 +8712,8 @@ bool SPIRVToLLVM::transMetadata() { fragmentMode.earlyFragmentTests = true; } + fragmentMode.waveOpsRequireHelperLanes = m_maximallyReconverges && m_hasDemoteToHelper; + Pipeline::setFragmentShaderMode(*m_m, fragmentMode); } else if (execModel == ExecutionModelGLCompute || execModel == ExecutionModelTaskEXT) { @@ -8816,7 +8965,8 @@ bool SPIRVToLLVM::transDecoration(SPIRVValue *bv, ArrayRef values) { std::vector mDs; mDs.push_back(ConstantAsMetadata::get(md)); auto mdNode = MDNode::get(*m_context, mDs); - gv->addMetadata(gSPIRVMD::InOut, *mdNode); + if (!gv->hasMetadata(gSPIRVMD::InOut)) + gv->addMetadata(gSPIRVMD::InOut, *mdNode); } else if (as == SPIRAS_Uniform) { // Translate decorations of blocks @@ -8876,7 +9026,8 @@ bool SPIRVToLLVM::transDecoration(SPIRVValue *bv, ArrayRef values) { resMDs.push_back(ConstantAsMetadata::get(ConstantInt::get(int32Ty, descSet))); resMDs.push_back(ConstantAsMetadata::get(ConstantInt::get(int32Ty, binding))); auto resMdNode = MDNode::get(*m_context, resMDs); - gv->addMetadata(gSPIRVMD::Resource, *resMdNode); + if (!gv->hasMetadata(gSPIRVMD::Resource)) + gv->addMetadata(gSPIRVMD::Resource, *resMdNode); // Build block metadata const bool isUniformBlock = bv->getType()->getPointerStorageClass() != StorageClassStorageBuffer && @@ -8887,6 +9038,9 @@ bool SPIRVToLLVM::transDecoration(SPIRVValue *bv, ArrayRef values) { if (bv->hasDecorate(DecorationOffset, 0, &atomicCounterOffset)) { blockDec.Offset = atomicCounterOffset; } + if (bv->hasDecorate(DecorationAliased)) { + blockDec.Aliased = true; + } blockDec.NonWritable = isUniformBlock; Type *blockMdTy = nullptr; @@ -8896,9 +9050,11 @@ bool SPIRVToLLVM::transDecoration(SPIRVValue *bv, ArrayRef values) { blockMDs.push_back(ConstantAsMetadata::get(blockMd)); auto blockMdNode = MDNode::get(*m_context, blockMDs); if (bv->getType()->getPointerStorageClass() == StorageClassAtomicCounter) { - gv->addMetadata(gSPIRVMD::AtomicCounter, *blockMdNode); + if (!gv->hasMetadata(gSPIRVMD::AtomicCounter)) + gv->addMetadata(gSPIRVMD::AtomicCounter, *blockMdNode); } else { - gv->addMetadata(gSPIRVMD::Block, *blockMdNode); + if (!gv->hasMetadata(gSPIRVMD::Block)) + gv->addMetadata(gSPIRVMD::Block, *blockMdNode); } } else if (bv->getType()->getPointerStorageClass() == StorageClassTaskPayloadWorkgroupEXT) { // Setup metadata for task payload @@ -9447,6 +9603,7 @@ Constant *SPIRVToLLVM::buildShaderBlockMetadata(SPIRVType *bt, ShaderBlockDecora blockMd.Volatile = blockDec.Volatile; blockMd.NonWritable = blockDec.NonWritable; blockMd.NonReadable = blockDec.NonReadable; + blockMd.Aliased = blockDec.Aliased; mdTy = Type::getInt64Ty(*m_context); return ConstantInt::get(mdTy, blockMd.U64All); @@ -9503,6 +9660,7 @@ Constant *SPIRVToLLVM::buildShaderBlockMetadata(SPIRVType *bt, ShaderBlockDecora blockMd.Volatile = blockDec.Volatile; blockMd.NonWritable = blockDec.NonWritable; blockMd.NonReadable = blockDec.NonReadable; + blockMd.Aliased = blockDec.Aliased; std::vector mdValues; mdValues.push_back(ConstantInt::get(int32Ty, stride)); @@ -9574,6 +9732,7 @@ Constant *SPIRVToLLVM::buildShaderBlockMetadata(SPIRVType *bt, ShaderBlockDecora ShaderBlockMetadata blockMd = {}; blockMd.offset = blockDec.Offset; blockMd.IsStruct = true; + blockMd.Aliased = blockDec.Aliased; // Construct structure metadata std::vector mdTys; @@ -9599,6 +9758,7 @@ Constant *SPIRVToLLVM::buildShaderBlockMetadata(SPIRVType *bt, ShaderBlockDecora blockMd.Volatile = blockDec.Volatile; blockMd.NonWritable = blockDec.NonWritable; blockMd.NonReadable = blockDec.NonReadable; + blockMd.Aliased = blockDec.Aliased; mdTy = Type::getInt64Ty(*m_context); return ConstantInt::get(mdTy, blockMd.U64All); @@ -10693,7 +10853,7 @@ void SPIRVToLLVM::createXfbMetadata(bool hasXfbOuts) { // Update indexOfBuffer for block array, the N array-elements are captured by N consecutive buffers. SPIRVType *bt = bv->getType()->getPointerElementType(); if (bt->isTypeArray()) { - auto output = cast(getTranslatedValue(bv)); + auto output = cast(getTranslatedValue(bv, nullptr, nullptr)); MDNode *metaNode = output->getMetadata(gSPIRVMD::InOut); assert(metaNode); auto elemMeta = mdconst::dyn_extract(metaNode->getOperand(0)); diff --git a/llpc/translator/lib/SPIRV/SPIRVReader.h b/llpc/translator/lib/SPIRV/SPIRVReader.h index 85d3085c01..04e5929cd6 100644 --- a/llpc/translator/lib/SPIRV/SPIRVReader.h +++ b/llpc/translator/lib/SPIRV/SPIRVReader.h @@ -106,13 +106,14 @@ class SPIRVToLLVM { Value *transAtomicRMW(SPIRVValue *, const AtomicRMWInst::BinOp); Constant *transInitializer(SPIRVValue *, Type *); template Value *transValueWithOpcode(SPIRVValue *); + template Value *transValueWithOpcode(SPIRVValue *, Function *f, BasicBlock *bb); template SmallVector transValueMultiWithOpcode(SPIRVValue *); + template SmallVector transValueMultiWithOpcode(SPIRVValue *, Function *f, BasicBlock *bb); Value *transLoadImage(SPIRVValue *spvImageLoadPtr); Value *loadImageSampler(Type *elementTy, Value *base); Value *transImagePointer(SPIRVValue *spvImagePtr, SPIRVType *elementTy = nullptr); Value *getDescPointerAndStride(lgc::ResourceNodeType resType, unsigned descriptorSet, unsigned binding, lgc::ResourceNodeType searchType); - Value *transOpAccessChainForImage(SPIRVAccessChainBase *spvAccessChain); Value *indexDescPtr(Type *elementTy, Value *base, Value *index); Value *transGroupArithOp(lgc::Builder::GroupArithOp, SPIRVValue *); @@ -136,7 +137,7 @@ class SPIRVToLLVM { Instruction *transBarrierFence(SPIRVInstruction *bi, BasicBlock *bb); Value *transString(const SPIRVString *spvValue); Value *transDebugPrintf(SPIRVInstruction *bi, const ArrayRef spvValues, Function *func, BasicBlock *bb); - Value *transVariable(SPIRVValue *const spvValue); + Value *transVariableNonImage(SPIRVValue *const spvValue); SmallVector transAccessChain(SPIRVValue *const spvValue); Value *transArrayLength(SPIRVValue *const spvValue); // Struct used to pass information in and out of getImageDesc. @@ -211,8 +212,8 @@ class SPIRVToLLVM { // Post-process translated LLVM module to undo row major matrices. bool postProcessRowMajorMatrix(); - SmallVector getTranslatedValues(SPIRVValue *bv); - Value *getTranslatedValue(SPIRVValue *bv); + SmallVector getTranslatedValues(SPIRVValue *bv, Function *f, BasicBlock *bb); + Value *getTranslatedValue(SPIRVValue *bv, Function *f, BasicBlock *bb); // Create !lgc.xfb.state metadata void createXfbMetadata(bool hasXfbOuts); @@ -245,6 +246,7 @@ class SPIRVToLLVM { typedef DenseMap> RemappedTypeElementsMap; typedef DenseMap SPIRVAccessChainValueToLLVMRetTypeMap; typedef DenseMap SPIRVToLLVMEntryMap; + typedef DenseMap, unsigned> BlockPredecessorToCountInFunction; // A SPIRV value may be translated to a load instruction of a placeholder // global variable. This map records load instruction of these placeholders @@ -272,6 +274,8 @@ class SPIRVToLLVM { SPIRVBlockToLLVMStructMap m_blockMap; SPIRVToLLVMPlaceholderMap m_placeholderMap; SPIRVToLLVMDbgTran m_dbgTran; + DenseMap, SmallVector> m_variableMap; + DenseMap m_variableNonImageMap; // Hash map with correlation between (SPIR-V) OpAccessChain and its returned (dereferenced) type. // We have to store base type because opaque-pointers are removing information about dereferenced type. @@ -281,7 +285,7 @@ class SPIRVToLLVM { DenseMap m_typesWithPadMap; DenseMap m_typeToStoreSize; DenseMap, Type *> m_overlappingStructTypeWorkaroundMap; - DenseMap, unsigned> m_blockPredecessorToCount; + DenseMap m_blockPredecessorToCount; const Vkgc::ShaderModuleUsage *m_moduleUsage; GlobalVariable *m_debugOutputBuffer; @@ -294,6 +298,7 @@ class SPIRVToLLVM { bool m_requireFullQuads; bool m_maximallyReconverges = false; + bool m_hasDemoteToHelper = false; enum class LlvmMemOpType : uint8_t { IS_LOAD, IS_STORE }; struct ScratchBoundsCheckData { @@ -386,13 +391,14 @@ class SPIRVToLLVM { // Used to keep track of the number of incoming edges to a block from each // of the predecessor. - void recordBlockPredecessor(BasicBlock *block, BasicBlock *predecessorBlock) { + void recordBlockPredecessor(Function *func, BasicBlock *block, BasicBlock *predecessorBlock) { + assert(func); assert(block); assert(predecessorBlock); - m_blockPredecessorToCount[{block, predecessorBlock}] += 1; + m_blockPredecessorToCount[func][{block, predecessorBlock}] += 1; } - unsigned getBlockPredecessorCounts(BasicBlock *block, BasicBlock *predecessor); + unsigned getBlockPredecessorCounts(Function *f, BasicBlock *block, BasicBlock *predecessor); bool isSPIRVBuiltinVariable(GlobalVariable *gv, SPIRVBuiltinVariableKind *kind = nullptr); diff --git a/llpc/translator/lib/SPIRV/SPIRVToLLVMDbgTran.cpp b/llpc/translator/lib/SPIRV/SPIRVToLLVMDbgTran.cpp index 97a5b6673c..3eb1299c25 100644 --- a/llpc/translator/lib/SPIRV/SPIRVToLLVMDbgTran.cpp +++ b/llpc/translator/lib/SPIRV/SPIRVToLLVMDbgTran.cpp @@ -876,6 +876,12 @@ Instruction *SPIRVToLLVMDbgTran::transDebugIntrinsic(const SPIRVExtInst *DebugIn auto GetExpression = [&](SPIRVId Id) -> DIExpression * { return transDebugInst(BM->get(Id)); }; + using LLPCDbgInstPtr = +#if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 492382 + DbgInstPtr; +#else + Instruction *; +#endif SPIRVWordVec Ops = DebugInst->getArguments(); switch (DebugInst->getExtOp()) { case SPIRVDebug::Scope: @@ -899,6 +905,7 @@ Instruction *SPIRVToLLVMDbgTran::transDebugIntrinsic(const SPIRVExtInst *DebugIn case SPIRVDebug::Declare: { using namespace SPIRVDebug::Operand::DebugDeclare; auto LocalVar = GetLocalVar(Ops[DebugLocalVarIdx]); + LLPCDbgInstPtr DbgInst; if (getDbgInst(Ops[VariableIdx])) { // If we don't have the variable(e.g. alloca might be promoted by mem2reg) // we should generate the following IR: @@ -908,19 +915,41 @@ Instruction *SPIRVToLLVMDbgTran::transDebugIntrinsic(const SPIRVExtInst *DebugIn // parameter. To work around this limitation we create a dummy temp // alloca, use it to create llvm.dbg.declare, and then remove the alloca. auto *AI = new AllocaInst(Type::getInt8Ty(M->getContext()), 0, "tmp", BB); - auto *DbgDeclare = - Builder.insertDeclare(AI, LocalVar.first, GetExpression(Ops[ExpressionIdx]), LocalVar.second, BB); + DbgInst = Builder.insertDeclare(AI, LocalVar.first, GetExpression(Ops[ExpressionIdx]), LocalVar.second, BB); AI->eraseFromParent(); - return DbgDeclare; + } else { + DbgInst = Builder.insertDeclare(GetValue(Ops[VariableIdx]), LocalVar.first, GetExpression(Ops[ExpressionIdx]), + LocalVar.second, BB); } - return Builder.insertDeclare(GetValue(Ops[VariableIdx]), LocalVar.first, GetExpression(Ops[ExpressionIdx]), - LocalVar.second, BB); +#if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 492382 + // Debug Info Format is in transition phase right now. + // If new Debug Info Format is turned ON then 'insertDeclare' will return DbgRecord. + // If new Debug Info Format is turned OFF then 'insertDeclare' will return Instruction (Intrinsic) which we are + // storing in hashMap. This part will be removed after the transition, since new DbgInfoFormat will be turned ON + // always and we will return nullptr from that point. This comment applies also to 'insertDbgValueIntrinsic' below. + if (DbgInst.is()) { + return DbgInst.get(); + } else { + return nullptr; + } +#else + return DbgInst; +#endif } case SPIRVDebug::Value: { using namespace SPIRVDebug::Operand::DebugValue; auto LocalVar = GetLocalVar(Ops[DebugLocalVarIdx]); - return Builder.insertDbgValueIntrinsic(GetValue(Ops[ValueIdx]), LocalVar.first, GetExpression(Ops[ExpressionIdx]), - LocalVar.second, BB); + LLPCDbgInstPtr DbgInst = Builder.insertDbgValueIntrinsic(GetValue(Ops[ValueIdx]), LocalVar.first, + GetExpression(Ops[ExpressionIdx]), LocalVar.second, BB); +#if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 492382 + if (DbgInst.is()) { + return DbgInst.get(); + } else { + return nullptr; + } +#else + return DbgInst; +#endif } default: llvm_unreachable("Unknown debug intrinsic!"); diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEntry.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEntry.h index 0fedfc2e62..dbb07263b4 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEntry.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEntry.h @@ -495,6 +495,12 @@ class SPIRVExecutionMode : public SPIRVAnnotation { WordLiterals.push_back(Word2); updateModuleVersion(); } + SPIRVExecutionMode(SPIRVEntry *TheTarget, SPIRVExecutionModeKind TheExecMode, SPIRVWord Word0, SPIRVWord Word1) + : SPIRVAnnotation(TheTarget, 5), ExecMode(TheExecMode) { + WordLiterals.push_back(Word0); + WordLiterals.push_back(Word1); + updateModuleVersion(); + } // Complete constructor for SubgroupSize, SubgroupsPerWorkgroup SPIRVExecutionMode(SPIRVEntry *TheTarget, SPIRVExecutionModeKind TheExecMode, SPIRVWord Code) : SPIRVAnnotation(TheTarget, 4), ExecMode(TheExecMode) { diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h index f250f8b3dd..b144cebd59 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -455,7 +455,7 @@ class SPIRVBaseVariable : public SPIRVInstruction { return std::vector(); } - virtual void validate() const {}; + virtual void validate() const override{}; SPIRVType *getMemObjType() const { SPIRVType *spvMemType = nullptr; if (getOpCode() == OpVariable) { diff --git a/llpc/util/llpcElfWriter.cpp b/llpc/util/llpcElfWriter.cpp index b844a41867..0aba94213f 100644 --- a/llpc/util/llpcElfWriter.cpp +++ b/llpc/util/llpcElfWriter.cpp @@ -35,6 +35,7 @@ #include "llvm/BinaryFormat/MsgPackDocument.h" #include #include +#include #define DEBUG_TYPE "llpc-elf-writer" @@ -50,6 +51,9 @@ static const char *const ApiStageNames[] = {".task", ".vertex", ".hull", ". // The names of hardware shader stages used in PAL metadata, in Util::Abi::HardwareStage order. static const char *const HwStageNames[] = {".ls", ".hs", ".es", ".gs", ".vs", ".ps", ".cs"}; +// The names for spill threshold for shader. +static const char ShaderSpillThreshold[] = ".shader_spill_threshold"; + // The suffix added to the rodata sections from the cached elf bin static const char CachedRodataSectionSuffix[] = ".cached"; // The suffix added to the symbols of the rodata sections from the cached elf bin @@ -241,10 +245,30 @@ void ElfWriter::mergeMetaNote(Context *pContext, const ElfNote *pNote1, con destPipeline.getMap(true)[PalAbi::PipelineMetadataKey::NumInterpolants] = srcNumIterpIt->second; // Copy .spill_threshold - auto destSpillThreshold = destPipeline.getMap(true)[PalAbi::PipelineMetadataKey::SpillThreshold].getUInt(); - auto srcSpillThreshold = srcPipeline.getMap(true)[PalAbi::PipelineMetadataKey::SpillThreshold].getUInt(); + // Fix the issue for run noiub.nouab.task.noia.0+noiub.nouab.task.noia.2 cases: + // 1. After running noia.0 case, it will has two elfs: fs+non_fs in cache + // 2. Then run noia.2 case, which fs is same with fs@noia.0 then this function will be triggered. + // 3. Then it will merge non-fs generated in noia.2 and fs which is saved in cached after noia.0 + // There will be a merge on threshold here, just merge fs_threshold@fs_in_cache not + // Pipelinelevel_threshold@fs_in_cache + unsigned srcSpillValue = USHRT_MAX; + if (pContext->getGfxIpVersion().major > 10) { + auto srcPsHwStage = srcPipeline.getMap(true)[PalAbi::PipelineMetadataKey::HardwareStages] + .getMap(true)[static_cast(Util::Abi::HardwareStage::Ps)] + .getMap(true); + auto srcSpillThreshold = &srcPsHwStage[ShaderSpillThreshold]; + if (!srcSpillThreshold->isEmpty()) { + srcSpillValue = srcPsHwStage[ShaderSpillThreshold].getUInt(); + } + } else { + // This is to revert and keep legacy behavior on gfx10 as to fix the block issue: hang on PAL for + // gfx_bench. Todo: Needs to keep same with gfx10+ + srcSpillValue = srcPipeline.getMap(true)[PalAbi::PipelineMetadataKey::SpillThreshold].getUInt(); + } + + unsigned destSpillThreshold = destPipeline.getMap(true)[PalAbi::PipelineMetadataKey::SpillThreshold].getUInt(); destPipeline.getMap(true)[PalAbi::PipelineMetadataKey::SpillThreshold] = - destDocument.getNode(std::min(srcSpillThreshold, destSpillThreshold)); + destDocument.getNode(std::min(srcSpillValue, destSpillThreshold)); // Copy .user_data_limit auto destUserDataLimit = destPipeline.getMap(true)[PalAbi::PipelineMetadataKey::UserDataLimit].getUInt(); @@ -610,6 +634,79 @@ template void ElfWriter::writeToBuffer(ElfPackage *pElf) { assert((buffer - data) == reqSize); } +// ===================================================================================================================== +// Fixups one relocation. +// +// @param relocIdx : The index of relocation fixup +// @param relocValue : The value to be added to the relocation +// @param targetSymbol : The target symbol index to change this relocation to point to +// @param modifyMask : The mask to apply to the relocation value +template +void ElfWriter::fixupRelocation(unsigned relocIdx, unsigned relocValue, unsigned targetSymbolIdx, + unsigned modifyMask) { + assert(m_relocSecIdx >= 0); + assert((relocValue & ~modifyMask) == 0 && "Relocation fixup value overflow"); + uint8_t *textData = const_cast(m_sections[m_textSecIdx].data); + uint8_t *relocData = const_cast(m_sections[m_relocSecIdx].data); + + bool isRela = false; + if (m_sections[m_relocSecIdx].secHead.sh_type == SHT_RELA) + isRela = true; + else + assert(m_sections[m_relocSecIdx].secHead.sh_type == SHT_REL); + + std::variant relocPtr; + Util::Abi::RelocationType relocType; + if (isRela) { + auto reloc = &reinterpret_cast(relocData)[relocIdx]; + relocType = static_cast(reloc->r_type); + relocPtr = reloc; + } else { + auto reloc = &reinterpret_cast(relocData)[relocIdx]; + relocType = static_cast(reloc->r_type); + relocPtr = reloc; + } + + switch (relocType) { + case Util::Abi::RelocationType::Abs32: + case Util::Abi::RelocationType::Abs32Lo: + case Util::Abi::RelocationType::Rel32: + case Util::Abi::RelocationType::Rel32Lo: + if (isRela) { + auto *reloc = std::get(relocPtr); + assert((reloc->r_addend & modifyMask) == 0 && "Modifying bits should be zero"); + reloc->r_addend |= (relocValue & modifyMask); + if (targetSymbolIdx != InvalidValue) { + reloc->r_symbol = targetSymbolIdx; + } + } else { + auto *reloc = std::get(relocPtr); + unsigned *targetDword = reinterpret_cast(textData + reloc->r_offset); + assert((*targetDword & modifyMask) == 0 && "Modifying bits should be zero"); + *targetDword |= (relocValue & modifyMask); + if (targetSymbolIdx != InvalidValue) { + reloc->r_symbol = targetSymbolIdx; + } + } + break; + case Util::Abi::RelocationType::Abs32Hi: + case Util::Abi::RelocationType::Rel32Hi: + // Only change the relocation symbol for high part relocation if required + if (targetSymbolIdx != InvalidValue) { + if (isRela) { + auto *reloc = std::get(relocPtr); + reloc->r_symbol = targetSymbolIdx; + } else { + auto *reloc = std::get(relocPtr); + reloc->r_symbol = targetSymbolIdx; + } + } + break; + default: + llvm_unreachable("Unsupported relocation type"); + } +} + // ===================================================================================================================== // Copies ELF content from a ElfReader. // diff --git a/llpc/util/llpcElfWriter.h b/llpc/util/llpcElfWriter.h index b51e1bd5db..99dc6cc044 100644 --- a/llpc/util/llpcElfWriter.h +++ b/llpc/util/llpcElfWriter.h @@ -121,6 +121,9 @@ template class ElfWriter { void writeToBuffer(ElfPackage *elf); + void fixupRelocation(unsigned relocIdx, unsigned relocValue, unsigned targetSymbolIdx = InvalidValue, + unsigned modifyMask = 0); + private: ElfWriter(const ElfWriter &) = delete; ElfWriter &operator=(const ElfWriter &) = delete; diff --git a/llpc/util/llpcError.cpp b/llpc/util/llpcError.cpp index 2f25780c53..adeafee8a9 100644 --- a/llpc/util/llpcError.cpp +++ b/llpc/util/llpcError.cpp @@ -90,6 +90,8 @@ struct ResultErrorCategory : std::error_category { return "Unsupported"; case Result::Success: return "Success"; + case Result::RequireFullPipeline: + return "RequireFullPipeline"; default: llvm_unreachable("Invalid Result code"); return "Invalid Result code"; diff --git a/llpc/util/llpcShaderModuleHelper.cpp b/llpc/util/llpcShaderModuleHelper.cpp index cdb1aee408..981afbfcc2 100644 --- a/llpc/util/llpcShaderModuleHelper.cpp +++ b/llpc/util/llpcShaderModuleHelper.cpp @@ -166,18 +166,6 @@ ShaderModuleUsage ShaderModuleHelper::getShaderModuleUsageInfo(const BinaryData break; } } - } else if (decoration == DecorationLocation) { - auto location = (opCode == OpDecorate) ? codePos[3] : codePos[4]; - if (location == static_cast(Vkgc::GlCompatibilityInOutLocation::ClipVertex)) - shaderModuleUsage.useClipVertex = true; - if (location == static_cast(Vkgc::GlCompatibilityInOutLocation::FrontColor)) - shaderModuleUsage.useFrontColor = true; - if (location == static_cast(Vkgc::GlCompatibilityInOutLocation::BackColor)) - shaderModuleUsage.useBackColor = true; - if (location == static_cast(Vkgc::GlCompatibilityInOutLocation::FrontSecondaryColor)) - shaderModuleUsage.useFrontSecondaryColor = true; - if (location == static_cast(Vkgc::GlCompatibilityInOutLocation::BackSecondaryColor)) - shaderModuleUsage.useBackSecondaryColor = true; } else if (decoration == DecorationPerVertexKHR) { shaderModuleUsage.useBarycentric = true; } else if (decoration == DecorationIndex) { diff --git a/llvmraytracing/README.md b/llvmraytracing/README.md index 8815fe33f5..0ce2cbed6e 100644 --- a/llvmraytracing/README.md +++ b/llvmraytracing/README.md @@ -7,4 +7,4 @@ This is supposed to be used as a submodule in a driver repository. ### Tests -Lit tests are behind the `check-continuations` CMake target, they can be run with `make check-continuations`. +Lit tests are behind the `check-llvmraytracing` CMake target, they can be run with `make check-llvmraytracing`. diff --git a/llvmraytracing/include/lgc/LgcCpsDialect.h b/llvmraytracing/include/lgc/LgcCpsDialect.h index bbd764c215..5fe96b5a5e 100644 --- a/llvmraytracing/include/lgc/LgcCpsDialect.h +++ b/llvmraytracing/include/lgc/LgcCpsDialect.h @@ -27,6 +27,7 @@ #include "llvm-dialects/Dialect/Builder.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/IRBuilder.h" #include #define GET_INCLUDES @@ -41,18 +42,11 @@ class Type; class Value; } // namespace llvm -namespace lgc::cps { -enum class CpsShaderStage : uint8_t { - RayGen = 0, - Traversal, - Intersection, - AnyHit, - ClosestHit, - Miss, - Callable, - Count, -}; +namespace lgc::rt { +enum class RayTracingShaderStage; +} // namespace lgc::rt +namespace lgc::cps { enum class CpsLevel : uint8_t { RayGen = 0, ClosestHit_Miss_Callable, @@ -64,6 +58,15 @@ enum class CpsLevel : uint8_t { constexpr unsigned stackAddrSpace = 32; +// The maximum amount of dwords usable for passing arguments +constexpr unsigned MaxArgumentDwords = 32; + +// The maximum allowed number of payload VGPRs to be used by RT lowering. Sizes +// beyond this value should be spilled to memory. +// TODO: Properly choose a value here, such that the total VGPR number is just +// below an allocation boundary. +constexpr unsigned CpsPayloadMaxNumVgprs = MaxArgumentDwords; + unsigned getArgumentDwordCount(const llvm::DataLayout &DL, llvm::Type *type); unsigned getArgumentDwordCount(const llvm::DataLayout &DL, llvm::ArrayRef types); @@ -74,11 +77,15 @@ getRemainingArgumentDwords(const llvm::DataLayout &DL, bool isCpsFunction(const llvm::Function &fn); void setCpsFunctionLevel(llvm::Function &fn, CpsLevel level); CpsLevel getCpsLevelFromFunction(const llvm::Function &fn); -CpsLevel getCpsLevelForShaderStage(CpsShaderStage stage); -uint8_t getPotentialCpsReturnLevels(CpsShaderStage stage); +CpsLevel getCpsLevelForShaderStage(lgc::rt::RayTracingShaderStage stage); +uint8_t getPotentialCpsReturnLevels(lgc::rt::RayTracingShaderStage stage); void pushStateToCpsStack(llvm_dialects::Builder &builder, lgc::cps::JumpOp &jumpOp); llvm::Value *popStateFromCpsStack(llvm_dialects::Builder &builder, const llvm::DataLayout &DL, llvm::Type *stateType); +llvm::Value * +lowerAsContinuationReference(llvm::IRBuilder<> &Builder, + lgc::cps::AsContinuationReferenceOp &AsCROp, + llvm::Value *Relocation = nullptr); } // namespace lgc::cps diff --git a/llvmraytracing/include/lgc/LgcCpsDialect.td b/llvmraytracing/include/lgc/LgcCpsDialect.td index 3da4c56f3d..ce56e6a3b9 100644 --- a/llvmraytracing/include/lgc/LgcCpsDialect.td +++ b/llvmraytracing/include/lgc/LgcCpsDialect.td @@ -75,7 +75,9 @@ def AwaitOp : LgcCpsOp<"await", [NoUnwind, WillReturn]> { // ===================================================================================================================== def AsContinuationReferenceOp : LgcCpsOp<"as.continuation.reference", [NoUnwind, WillReturn]> { let arguments = (ins PointerType:$fn); - let results = (outs ContinuationReference:$ref); + let results = (outs (or ContinuationReference, I64):$ref); + + let defaultBuilderHasExplicitResultType = true; let summary = "Obtain a continuation reference from a function pointer."; let description = [{ diff --git a/llvmraytracing/include/lgc/LgcRtDialect.h b/llvmraytracing/include/lgc/LgcRtDialect.h index 76ab2b2775..0a8b4d5863 100644 --- a/llvmraytracing/include/lgc/LgcRtDialect.h +++ b/llvmraytracing/include/lgc/LgcRtDialect.h @@ -50,7 +50,8 @@ enum class RayTracingShaderStage { Callable, // Not an input shader stage but we need to annotate it as well Traversal, - KernelEntry + KernelEntry, + Count }; // Set shader stage metadata on a LLVM function and erase it by setting @@ -88,7 +89,7 @@ void setShaderArgSize(llvm::Function *func, size_t size); // Get attribute size (in bytes) metadata for a ray-tracing shader // function. -size_t getShaderHitAttributeSize(const llvm::Function *func); +std::optional getShaderHitAttributeSize(const llvm::Function *func); // Set attribute size (in bytes) metadata for a ray-tracing shader // function. diff --git a/llvmraytracing/include/llvmraytracing/Continuations.h b/llvmraytracing/include/llvmraytracing/Continuations.h index 6dadf1f827..a53a01df61 100644 --- a/llvmraytracing/include/llvmraytracing/Continuations.h +++ b/llvmraytracing/include/llvmraytracing/Continuations.h @@ -81,9 +81,7 @@ #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" #include "llvm/Transforms/Coroutines/CoroSplit.h" #include #include @@ -102,10 +100,10 @@ class PassManagerBuilder; class SmallBitVector; struct CoroSplitPass; -// Returns the PAQShaderStage corresponding to the given DXILShaderKind, if -// there is any. +// Returns the PAQShaderStage corresponding to the given RayTracingShaderStage, +// if there is any. std::optional -dxilShaderKindToPAQShaderStage(DXILShaderKind ShaderKind); +rtShaderStageToPAQShaderStage(lgc::rt::RayTracingShaderStage ShaderKind); /// Create a new function, as cloneFunctionHeader, but include types metadata. Function *cloneFunctionHeaderWithTypes(Function &F, ContFuncTy &NewType, @@ -164,7 +162,8 @@ Value *getDXILSystemData(IRBuilder<> &B, Value *SystemData, Type *SystemDataTy, /// Replace call to intrinsic (lgc.rt.*) with a call to the driver /// implementation (_cont_*). CallInst *replaceIntrinsicCall(IRBuilder<> &B, Type *SystemDataTy, - Value *SystemData, DXILShaderKind Kind, + Value *SystemData, + lgc::rt::RayTracingShaderStage Kind, CallInst *Call, Module *GpurtLibrary, CompilerUtils::CrossModuleInliner &Inliner); @@ -231,7 +230,8 @@ class LegacyCleanupContinuationsPass class CleanupContinuationsPass : public llvm::PassInfoMixin { public: - CleanupContinuationsPass() {} + CleanupContinuationsPass(bool Use64BitContinuationReferences = false) + : Use64BitContinuationReferences{Use64BitContinuationReferences} {} llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); @@ -265,13 +265,27 @@ class CleanupContinuationsPass void lowerIntrinsicCall(Module &Mod); void lowerGetResumePoint(Module &Mod); - llvm_dialects::Builder *Builder; - Function *ContMalloc; - Function *ContFree; + llvm_dialects::Builder *Builder = nullptr; + Function *ContMalloc = nullptr; + Function *ContFree = nullptr; MapVector ToProcess; uint32_t MaxContStateBytes; - llvm::Module *GpurtLibrary; + llvm::Module *GpurtLibrary = nullptr; + bool Use64BitContinuationReferences; + llvm::Type *ContinuationReferenceType = nullptr; }; + +// Define a wrapper pass that is used for CleanupContinuationsPass creating +// 64-bit lgc.cps.as.continuation.reference ops. +class DXILCleanupContinuationsPass : public CleanupContinuationsPass { +public: + DXILCleanupContinuationsPass() : CleanupContinuationsPass(true) {} + + static llvm::StringRef name() { + return "DXIL cleanup continuations pass wrapper"; + } +}; + class LowerRaytracingPipelinePass : public llvm::PassInfoMixin { public: @@ -384,6 +398,10 @@ class DXILCoroSplitPass : public CoroSplitPass { } }; +// Helper function to query whether an instruction is rematerializable, which is +// shared between both DX and Vulkan path. +bool commonMaterializable(Instruction &I); + // Rematerializable callback specific to LgcCps - mainly used to extend what's // considered rematerializable for continuations bool LgcMaterializable(Instruction &I); diff --git a/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h b/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h index 6a158ea2de..620435a544 100644 --- a/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h +++ b/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h @@ -34,6 +34,7 @@ #include "lgc/LgcCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "llpc/GpurtEnums.h" +#include "llpc/GpurtVersion.h" #include "llvm-dialects/Dialect/OpMap.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" @@ -89,6 +90,8 @@ constexpr uint32_t CpsArgIdxReturnAddr = 1; constexpr uint32_t CpsArgIdxShaderIndex = 2; constexpr uint32_t CpsArgIdxSystemData = 3; constexpr uint32_t CpsArgIdxHitAttributes = 4; +constexpr uint32_t CpsArgIdxPadding = 5; +constexpr uint32_t CpsArgIdxPayload = 6; struct DxRayIntrinsic { unsigned int Id; @@ -180,6 +183,13 @@ Type *getFuncArgPtrElementType(const Argument *Arg); /// Returns nullptr for non-pointers. Type *getFuncArgPtrElementType(const Function *F, int ArgNo); +struct ContSetting { + /// A hash value that is used as name. + uint64_t NameHash; + /// Value of the setting + uint64_t Value; +}; + // Helper class to access data specific to continuation passes, e.g. // metadata or globals. class ContHelper { @@ -314,6 +324,7 @@ class ContHelper { static constexpr const char *MDTypesVoidName = "void"; static constexpr const char *MDContPayloadTyName = "cont.payload.type"; static constexpr const char *MDLgcCpsModuleName = "lgc.cps.module"; + static constexpr const char *MDGpurtSettingsName = "gpurt.settings"; // Global variable names static constexpr const char *GlobalPayloadName = "PAYLOAD"; @@ -337,13 +348,48 @@ class ContHelper { // modules. static void addDxilGpurtLibraryPasses(llvm::ModulePassManager &MPM); + // Get gpurt settings from metadata. + static void getGpurtSettings(const Module &M, + SmallVectorImpl &Settings) { + auto *MD = M.getNamedMetadata(MDGpurtSettingsName); + if (!MD) + return; + auto *Tup = MD->getOperand(0); + + // Stored as {name, value, name, value, ...} + for (auto *Op = Tup->op_begin(); Op != Tup->op_end(); ++Op) { + ContSetting Setting; + Setting.NameHash = mdconst::extract(*Op)->getZExtValue(); + ++Op; + Setting.Value = mdconst::extract(*Op)->getZExtValue(); + Settings.push_back(Setting); + } + }; + + // Store gpurt settings in metadata. + static void setGpurtSettings(Module &M, ArrayRef Settings) { + auto *MD = M.getOrInsertNamedMetadata(MDGpurtSettingsName); + MD->clearOperands(); + auto &Context = M.getContext(); + SmallVector Vals; + IntegerType *Int64Ty = Type::getInt64Ty(Context); + // Stored as {bitwidth, value, bitwidth, value, ...} + for (auto &Setting : Settings) { + Vals.push_back( + ConstantAsMetadata::get(ConstantInt::get(Int64Ty, Setting.NameHash))); + Vals.push_back( + ConstantAsMetadata::get(ConstantInt::get(Int64Ty, Setting.Value))); + } + MD->addOperand(MDTuple::get(Context, Vals)); + } + // Set metadata specifying the number of outgoing payload registers. static void setOutgoingRegisterCount(Instruction *I, uint32_t RegisterCount) { I->setMetadata(MDRegisterCountName, getI32MDConstant(I->getContext(), RegisterCount)); } - // Get the number of incoming payload registers if set. + // Get the number of outgoing payload registers if set. static std::optional tryGetOutgoingRegisterCount(const Instruction *I) { return extractZExtI32Constant(I->getMetadata(MDRegisterCountName)); @@ -438,10 +484,6 @@ class ContHelper { MD->addOperand(getI32MDConstant(M.getContext(), MaxPayloadRegisterCount)); } - static std::optional tryGetPayloadRegisterCount(const Module &M) { - return tryGetMaxUsedPayloadRegisterCount(M); - } - static void setMaxHitAttributeByteCount(Function &F, uint32_t MaxHitAttributeByteCount) { lgc::rt::setShaderHitAttributeSize(&F, MaxHitAttributeByteCount); @@ -586,19 +628,41 @@ class ContHelper { /// in a shader of the specified kind. /// /// If no shader kind is specified, return false. - static bool - isRematerializableLgcRtOp(CallInst &CInst, - std::optional Kind = std::nullopt); + static bool isRematerializableLgcRtOp( + CallInst &CInst, + std::optional Kind = std::nullopt); static bool isLegacyEntryFunction(Function *Func) { return Func->hasMetadata(MDEntryName); } + + // Given a list of types, get a type that makes the list of types + // occupy a specific number of dwords including it. + static Type *getPaddingType(const DataLayout &DL, LLVMContext &Context, + ArrayRef Types, unsigned TargetNumDwords); + + // Given a list of types, add a type to the list that makes the list of types + // occupy a specific number of dwords. + static void addPaddingType(const DataLayout &DL, LLVMContext &Context, + SmallVectorImpl &Types, + unsigned TargetNumDwords); + + // Given a list of values, add a value to the list that makes the list of + // values occupy a specific number of dwords. + static void addPaddingValue(const DataLayout &DL, LLVMContext &Context, + SmallVectorImpl &Values, + unsigned TargetNumDwords); + + // Returns whether the given flag is enabled in the given GpuRt module, + // using the GpuRt version flags intrinsic. If the intrinsic is not found, + // returns true, enabling new behavior (e.g. for tests). + static bool getGpurtVersionFlag(Module &GpurtModule, GpuRtVersionFlag Flag); }; class ShaderStageHelper final { public: static DXILShaderKind - shaderStageToDxilShaderKind(lgc::rt::RayTracingShaderStage Stage) { + rtShaderStageToDxilShaderKind(lgc::rt::RayTracingShaderStage Stage) { switch (Stage) { case lgc::rt::RayTracingShaderStage::RayGeneration: return DXILShaderKind::RayGeneration; @@ -619,12 +683,13 @@ class ShaderStageHelper final { // eliminate most uses of DXILShaderKind except for initial // conversions to the shared enum. return DXILShaderKind::Compute; + default: + llvm_unreachable("invalid stage!"); } - llvm_unreachable("invalid stage!"); } static std::optional - dxilShaderKindToShaderStage(DXILShaderKind Kind) { + dxilShaderKindToRtShaderStage(DXILShaderKind Kind) { switch (Kind) { case DXILShaderKind::RayGeneration: return lgc::rt::RayTracingShaderStage::RayGeneration; @@ -665,6 +730,7 @@ DRIVER_FUNC_NAME(GetSbtStride) DRIVER_FUNC_NAME(HitKind) DRIVER_FUNC_NAME(Traversal) DRIVER_FUNC_NAME(KernelEntry) +DRIVER_FUNC_NAME(GpurtVersionFlags) #undef DRIVER_FUNC_NAME } // namespace ContDriverFunc @@ -718,15 +784,6 @@ void forEachTerminator(Function *Func, ArrayRef TerminatorOpcodes, } } -// Essentially RAUW for pointers for the case that these use different address -// spaces, rewriting all derived pointers to also use the new address space. -// Writes instructions which are redundant after the replacement into -// the given ToBeRemoved vector. -// The caller has to handle the erasure afterwards. -void replaceAllPointerUses(IRBuilder<> *Builder, Value *OldPointerValue, - Value *NewPointerValue, - SmallVectorImpl &ToBeRemoved); - // Do store-to-load forwarding for memory access to continuation stack. This is // helpful to mitigate the issue that coroutine passes in some cases still load // state from the in-memory continuation state when it is still available in SSA @@ -734,10 +791,6 @@ void replaceAllPointerUses(IRBuilder<> *Builder, Value *OldPointerValue, // program that may alias the pointer argument. void forwardContinuationFrameStoreToLoad(DominatorTree &DT, Value *FramePtr); -// Replacement for PointerType::getWithSamePointeeType that works with new LLVM. -// Returns a typed pointer type if the pointer type is typed. -PointerType *getWithSamePointeeType(PointerType *PtrTy, unsigned AddressSpace); - /// Look for the continue call that is dominated by the call to /// GetResumePointAddr. Due to saving the payload before, many basic blocks may /// have been inserted, traverse them while making sure that this diff --git a/llvmraytracing/include/llvmraytracing/CpsStackLowering.h b/llvmraytracing/include/llvmraytracing/CpsStackLowering.h index 87453c3f48..53c8b6ebbf 100644 --- a/llvmraytracing/include/llvmraytracing/CpsStackLowering.h +++ b/llvmraytracing/include/llvmraytracing/CpsStackLowering.h @@ -60,7 +60,11 @@ class CpsStackLowering { BasePointer = llvm::ConstantPointerNull::get(llvm::PointerType::get( llvm::Type::getInt8Ty(Context), LoweredCpsStackAddrSpace)); } - llvm::Function *lowerCpsStackOps(llvm::Function &, llvm::Value *); + llvm::Function *lowerCpsStackOps(llvm::Function *Func, + llvm::Function *GetGlobalMemBase, + bool RequiresIncomingCsp, + llvm::Value *CspStorage = nullptr); + // Get continuation stack size (in bytes). unsigned getStackSizeInBytes() { return StackSizeInBytes; } @@ -73,18 +77,6 @@ class CpsStackLowering { return Layout.getPointerSize(LoweredCpsStackAddrSpace); } - // Register a base pointer in the CpsStackLowering. - // This is used to set the base address when using a stack residing in global - // memory. BasePointer is by default a zero pointer in the - // @LoweredCpsStackAddrSpace. During the lowering of load / store - // instructions, a GEP will be constructed that uses the base pointer and the - // corresponding CSP as offset for the source / dest addresses. In case - // @setRealBasePointer never was called, this just creates a pointer out of an - // offset. - void setRealBasePointer(llvm::Value *BasePointer) { - this->BasePointer = BasePointer; - } - static unsigned getContinuationStackAlignment() { return ContinuationStackAlignment; } @@ -106,9 +98,26 @@ class CpsStackLowering { void visitLoad(llvm::LoadInst &); void visitStore(llvm::StoreInst &); llvm::Value *getRealMemoryAddress(llvm::IRBuilder<> &, llvm::Value *); + llvm::Function *addOrInitCsp(llvm::Function *F, + llvm::Function *GetGlobalMemBase, + bool RequiresIncomingCsp); + void visitContinueCalls(llvm::Function *); + void visitContinueCall(llvm::CallInst &); + + // Register a base pointer in the CpsStackLowering. + // This is used to set the base address when using a stack residing in global + // memory. BasePointer is by default a zero pointer in the + // @LoweredCpsStackAddrSpace. During the lowering of load / store + // instructions, a GEP will be constructed that uses the base pointer and the + // corresponding CSP as offset for the source / dest addresses. In case + // @setRealBasePointer never was called, this just creates a pointer out of an + // offset. + void setRealBasePointer(llvm::Value *BasePointer) { + this->BasePointer = BasePointer; + } llvm::Module *Mod; - llvm::AllocaInst *CpsStackAlloca; + llvm::AllocaInst *CpsStackAlloca = nullptr; unsigned LoweredCpsStackAddrSpace; unsigned StackSizeInBytes = 0; llvm::Value *BasePointer = nullptr; diff --git a/llvmraytracing/include/llvmraytracing/PayloadAccessQualifiers.h b/llvmraytracing/include/llvmraytracing/PayloadAccessQualifiers.h index cb744d811c..0acfac6051 100644 --- a/llvmraytracing/include/llvmraytracing/PayloadAccessQualifiers.h +++ b/llvmraytracing/include/llvmraytracing/PayloadAccessQualifiers.h @@ -886,7 +886,7 @@ class PAQSerializationInfoManager { // getOrCreateCallShaderSerializationInfo depending on ShaderKind. PAQSerializationInfoBase & getOrCreateSerializationInfo(const PAQPayloadConfig &PayloadConfig, - DXILShaderKind ShaderKind); + lgc::rt::RayTracingShaderStage ShaderKind); // Check whether a serialization info for the given // payload type has already been computed (or imported from DXIL metadata). @@ -912,12 +912,14 @@ class PAQSerializationInfoManager { // Convenience wrapper that selects the layout to be used for the payload // incoming to a shader on shader entry. const PAQSerializationLayout &getOrCreateShaderStartSerializationLayout( - PAQSerializationInfoBase &SerializationInfo, DXILShaderKind ShaderKind, + PAQSerializationInfoBase &SerializationInfo, + lgc::rt::RayTracingShaderStage ShaderKind, Type *HitAttributesTy = nullptr); // Convenience wrapper that selects the layout to be used for the payload // outgoing of a shader on shader exit. const PAQSerializationLayout &getOrCreateShaderExitSerializationLayout( - PAQSerializationInfoBase &SerializationInfo, DXILShaderKind ShaderKind, + PAQSerializationInfoBase &SerializationInfo, + lgc::rt::RayTracingShaderStage ShaderKind, Type *HitAttributesTy = nullptr, AnyHitExitKind AHExitKind = AnyHitExitKind::None); diff --git a/llvmraytracing/lib/CleanupContinuations.cpp b/llvmraytracing/lib/CleanupContinuations.cpp index 089df84842..faf8030575 100644 --- a/llvmraytracing/lib/CleanupContinuations.cpp +++ b/llvmraytracing/lib/CleanupContinuations.cpp @@ -61,17 +61,14 @@ #include "llvm-dialects/Dialect/Visitor.h" #include "llvmraytracing/Continuations.h" #include "llvmraytracing/ContinuationsDialect.h" +#include "llvmraytracing/ContinuationsUtil.h" #include "llvmraytracing/GpurtContext.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/InitializePasses.h" -#include "llvm/Support/MathExtras.h" -#include using namespace llvm; using namespace lgc; @@ -202,7 +199,7 @@ void CleanupContinuationsPass::updateCpsStack(Function *F, Function *NewFunc, SmallVector ToBeRemoved; Value *OldBase = getContinuationFramePtr(F, IsStart, CpsInfo, &ToBeRemoved); - replaceAllPointerUses(Builder, OldBase, CpsStack, ToBeRemoved); + CompilerUtils::replaceAllPointerUses(Builder, OldBase, CpsStack, ToBeRemoved); for (auto *I : reverse(ToBeRemoved)) I->eraseFromParent(); @@ -480,19 +477,31 @@ void CleanupContinuationsPass::handleSingleContinue(ContinuationData &Data, Builder->SetInsertPoint(Call); SmallVector TailArgs; - // %rcr (aka. return continuation reference) for the callee. + uint32_t SkipCount = 2; + // WaitMask and %rcr (aka. return continuation reference) for the callee. if (cps::isCpsFunction(*cast(ResumeFun))) { - auto *ResumeCR = Builder->create(ResumeFun); + // Ensure the first argument stays the wait mask. This comes after the CR + // and the levels. + if (ContHelper::isWaitAwaitCall(*Call)) { + TailArgs.push_back(Call->getArgOperand(2)); + ++SkipCount; + } + + auto *ResumeCR = Builder->create( + ContinuationReferenceType, ResumeFun); + TailArgs.push_back(ResumeCR); } else { // For entry-point compute kernel, pass a poison %rcr. TailArgs.push_back(PoisonValue::get(Builder->getInt32Ty())); } - // Skip continuation.reference and levels. - TailArgs.append(SmallVector(drop_begin(Call->args(), 2))); + // Skip continuation.reference, levels and potentially the wait mask. + TailArgs.append(SmallVector(drop_begin(Call->args(), SkipCount))); auto *CR = Call->getArgOperand(0); - Value *Level = Call->getArgOperand(1); + Value *Level = + Call->getArgOperand(ContHelper::isWaitAwaitCall(*Call) ? 2 : 1); unsigned LevelImm = cast(Level)->getZExtValue(); + // TODO: Continuation state are passed through stack for now. auto *State = PoisonValue::get(StructType::get(Builder->getContext(), {})); auto *JumpCall = Builder->create(CR, LevelImm, State, TailArgs); @@ -538,8 +547,6 @@ void CleanupContinuationsPass::lowerIntrinsicCall(Module &Mod) { auto Stage = lgc::rt::getLgcRtShaderStage(Caller); if (!Stage) continue; - DXILShaderKind ShaderKind = - ShaderStageHelper::shaderStageToDxilShaderKind(*Stage); // Signature of cps function: { state, rcr, shader-index, system-data} auto *SystemDataArg = Caller->getArg(CpsArgIdxSystemData); @@ -552,30 +559,36 @@ void CleanupContinuationsPass::lowerIntrinsicCall(Module &Mod) { Builder->CreateStore(SystemDataArg, SystemData); for (auto *Call : IntrinsicCalls) replaceIntrinsicCall(*Builder, SystemDataArg->getType(), SystemData, - ShaderKind, Call, GpurtLibrary ? GpurtLibrary : &Mod, + *Stage, Call, GpurtLibrary ? GpurtLibrary : &Mod, CrossInliner); } } void CleanupContinuationsPass::lowerGetResumePoint(Module &Mod) { - auto *GetResumePoint = Mod.getFunction("_AmdGetResumePointAddr"); - if (!GetResumePoint) - return; - - for (auto &Use : make_early_inc_range(GetResumePoint->uses())) { - auto *GetResumeCall = dyn_cast(Use.getUser()); - // Get the lgc.cps.jump that is dominated by this _AmdGetResumePointAddr - // call. - auto JumpCall = findDominatedContinueCall(GetResumeCall); - assert(JumpCall && "Should find a dominated call to lgc.cps.jump"); - Value *ResumeFn = *cast(*JumpCall)->getTail().begin(); - assert(ResumeFn && isa(ResumeFn)); - // We can always move this as.continuation.reference call. - cast(ResumeFn)->moveBefore(GetResumeCall); - Builder->SetInsertPoint(GetResumeCall); - auto *ResumePtr = Builder->CreateZExt(ResumeFn, Builder->getInt64Ty()); - GetResumeCall->replaceAllUsesWith(ResumePtr); - GetResumeCall->eraseFromParent(); + for (auto &F : make_early_inc_range(Mod)) { + auto FuncName = F.getName(); + if (!FuncName.starts_with("_AmdGetResumePointAddr")) + continue; + for (auto &Use : make_early_inc_range(F.uses())) { + auto *GetResumeCall = dyn_cast(Use.getUser()); + // Get the lgc.cps.jump that is dominated by this _AmdGetResumePointAddr + // call. + auto JumpCall = findDominatedContinueCall(GetResumeCall); + assert(JumpCall && "Should find a dominated call to lgc.cps.jump"); + // For wait calls, skip the wait mask. + uint32_t SkipCount = + ContHelper::isWaitAwaitCall(*(JumpCall.value())) ? 1 : 0; + + lgc::cps::JumpOp *Jump = cast(*JumpCall); + Value *ResumeFn = *(Jump->getTail().begin() + SkipCount); + assert(ResumeFn && isa(ResumeFn)); + // We can always move this as.continuation.reference call. + cast(ResumeFn)->moveBefore(GetResumeCall); + Builder->SetInsertPoint(GetResumeCall); + auto *ResumePtr = Builder->CreateZExt(ResumeFn, Builder->getInt64Ty()); + GetResumeCall->replaceAllUsesWith(ResumePtr); + GetResumeCall->eraseFromParent(); + } } } @@ -595,6 +608,12 @@ CleanupContinuationsPass::run(llvm::Module &Mod, llvm_dialects::Builder B(Mod.getContext()); Builder = &B; + + if (Use64BitContinuationReferences) + ContinuationReferenceType = Builder->getInt64Ty(); + else + ContinuationReferenceType = Builder->getInt32Ty(); + // Map the entry function of a continuation to the analysis result for (auto &F : Mod.functions()) { if (F.empty()) diff --git a/llvmraytracing/lib/Continuations.cpp b/llvmraytracing/lib/Continuations.cpp index b5f1d8eaa6..5a476813dc 100644 --- a/llvmraytracing/lib/Continuations.cpp +++ b/llvmraytracing/lib/Continuations.cpp @@ -33,7 +33,6 @@ #include "compilerutils/CompilerUtils.h" #include "lgc/LgcCpsDialect.h" #include "lgc/LgcRtDialect.h" -#include "llvm-dialects/Dialect/Builder.h" #include "llvm-dialects/Dialect/Dialect.h" #include "llvm-dialects/Dialect/OpSet.h" #include "llvmraytracing/ContinuationsDialect.h" @@ -46,6 +45,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/ErrorHandling.h" @@ -95,6 +95,7 @@ const llvm_dialects::OpMap llvm::LgcRtGpuRtMap = {{ GPURTMAP_ENTRY(PrimitiveIndexOp, "PrimitiveIndex", true), GPURTMAP_ENTRY(GeometryIndexOp, "GeometryIndex", true), GPURTMAP_ENTRY(InstanceInclusionMaskOp, "InstanceInclusionMask", false), + GPURTMAP_ENTRY(TriangleVertexPositionsOp, "TriangleVertexPositions", true), }}; #undef GPURTMAP_ENTRY @@ -139,8 +140,8 @@ bool llvm::removeUnusedFunctionDecls(Module *Mod, bool OnlyIntrinsics) { return DidChange; } -bool ContHelper::isRematerializableLgcRtOp(CallInst &CInst, - std::optional Kind) { +bool ContHelper::isRematerializableLgcRtOp( + CallInst &CInst, std::optional Kind) { using namespace lgc::rt; Function *Callee = CInst.getCalledFunction(); if (!llvm::isLgcRtOp(Callee)) @@ -158,7 +159,7 @@ bool ContHelper::isRematerializableLgcRtOp(CallInst &CInst, // ClosestHit, because if ClosestHit calls TraceRay or CallShader, that // information is lost from the system data struct. Also exclude rayTCurrent // because ReportHit calls can change that. - if (!Kind || *Kind == DXILShaderKind::Intersection) { + if (!Kind || *Kind == RayTracingShaderStage::Intersection) { static const llvm_dialects::OpSet RematerializableIntersectionDialectOps = llvm_dialects::OpSet::get< InstanceIdOp, InstanceIndexOp, GeometryIndexOp, @@ -172,104 +173,52 @@ bool ContHelper::isRematerializableLgcRtOp(CallInst &CInst, return false; } -void llvm::replaceAllPointerUses(IRBuilder<> *Builder, Value *OldPointerValue, - Value *NewPointerValue, - SmallVectorImpl &ToBeRemoved) { - // Note: The implementation explicitly supports typed pointers, which - // complicates some of the code below. - - // Assert that both types are pointers that only differ in the address space. - PointerType *OldPtrTy = cast(OldPointerValue->getType()); - PointerType *NewPtrTy = cast(NewPointerValue->getType()); - unsigned NewAS = NewPtrTy->getAddressSpace(); - assert(NewAS != OldPtrTy->getAddressSpace()); - assert(getWithSamePointeeType(OldPtrTy, NewAS) == NewPtrTy); - - OldPointerValue->mutateType(NewPtrTy); - - // Traverse through the users and setup the addrspace - SmallVector Worklist(OldPointerValue->users()); - OldPointerValue->replaceAllUsesWith(NewPointerValue); - - // Given a pointer type, get a pointer with the same pointee type (possibly - // opaque) as the given type that uses the NewAS address space. - auto GetMutatedPtrTy = [NewAS](Type *Ty) { - PointerType *PtrTy = cast(Ty); - // Support typed pointers: - return getWithSamePointeeType(PtrTy, NewAS); - }; +Type *ContHelper::getPaddingType(const DataLayout &DL, LLVMContext &Context, + ArrayRef Types, + unsigned TargetNumDwords) { + unsigned DwordsOccupied = lgc::cps::getArgumentDwordCount(DL, Types); + + assert(DwordsOccupied <= TargetNumDwords); + unsigned DwordsRemaining = TargetNumDwords - DwordsOccupied; + if (DwordsRemaining > 0) { + auto I32 = Type::getInt32Ty(Context); + return ArrayType::get(I32, DwordsRemaining); + } else { + return StructType::get(Context); + } +} - while (!Worklist.empty()) { - Value *Ptr = Worklist.pop_back_val(); - Instruction *Inst = cast(Ptr); - LLVM_DEBUG(dbgs() << "Visiting " << *Inst << '\n'); - // In the switch below, "break" means to continue with replacing - // the users of the current value, while "continue" means to stop at - // the current value, and proceed with next one from the work list. - switch (Inst->getOpcode()) { - default: - LLVM_DEBUG(Inst->dump()); - llvm_unreachable("Unhandled instruction\n"); - break; - case Instruction::Call: { - if (Inst->isLifetimeStartOrEnd()) { - // The lifetime marker is not useful anymore. - Inst->eraseFromParent(); - } else { - LLVM_DEBUG(Inst->dump()); - llvm_unreachable("Unhandled call instruction\n"); - } - // No further processing needed for the users. - continue; - } - case Instruction::Load: - case Instruction::Store: - // No further processing needed for the users. - continue; - case Instruction::And: - case Instruction::Add: - case Instruction::PtrToInt: - break; - case Instruction::BitCast: { - // This can happen with typed pointers - auto *BC = cast(Inst); - assert(cast(Inst)->getSrcTy()->isPointerTy() && - BC->getDestTy()->isPointerTy()); - Inst->mutateType(GetMutatedPtrTy(Inst->getType())); - break; - } - case Instruction::AddrSpaceCast: - // Check that the pointer operand has already been fixed - assert(Inst->getOperand(0)->getType()->getPointerAddressSpace() == NewAS); - // Push the correct users before RAUW. - Worklist.append(Ptr->users().begin(), Ptr->users().end()); - Inst->mutateType(GetMutatedPtrTy(Inst->getType())); - // Since we are mutating the address spaces of users as well, - // we can just use the (already mutated) cast operand. - Inst->replaceAllUsesWith(Inst->getOperand(0)); - ToBeRemoved.push_back(Inst); - continue; - case Instruction::IntToPtr: - case Instruction::GetElementPtr: { - Inst->mutateType(GetMutatedPtrTy(Inst->getType())); - break; - } - case Instruction::Select: { - auto *OldType = Inst->getType(); - if (OldType->isPointerTy()) { - Type *NewType = GetMutatedPtrTy(OldType); - // No further processing if the type has the correct pointer type - if (NewType == OldType) - continue; - - Inst->mutateType(NewType); - } - break; - } - } +void ContHelper::addPaddingType(const DataLayout &DL, LLVMContext &Context, + SmallVectorImpl &Types, + unsigned TargetNumDwords) { + Types.push_back(getPaddingType(DL, Context, Types, TargetNumDwords)); +} + +void ContHelper::addPaddingValue(const DataLayout &DL, LLVMContext &Context, + SmallVectorImpl &Values, + unsigned TargetNumDwords) { + SmallVector Types; + for (auto Value : Values) + Types.push_back(Value->getType()); - Worklist.append(Ptr->users().begin(), Ptr->users().end()); + Values.push_back( + PoisonValue::get(getPaddingType(DL, Context, Types, TargetNumDwords))); +} + +bool ContHelper::getGpurtVersionFlag(Module &GpurtModule, + GpuRtVersionFlag Flag) { + auto *F = GpurtModule.getFunction(ContDriverFunc::GpurtVersionFlagsName); + if (!F) { + // If the GpuRt version flags intrinsic is not found, treat flags as set, + // enabling new behavior. This is mainly intended for tests which lack the + // intrinsic and should always use the new behavior. + return true; } + StructType *RetTy = cast(F->getReturnType()); + assert(RetTy->getNumElements() == 1); + ArrayType *InnerTy = cast(RetTy->getElementType(0)); + uint32_t Flags = InnerTy->getNumElements(); + return (Flags & static_cast(Flag)) != 0; } void llvm::forwardContinuationFrameStoreToLoad(DominatorTree &DT, @@ -356,6 +305,14 @@ void llvm::forwardContinuationFrameStoreToLoad(DominatorTree &DT, Worklist.push_back(PointerUse(&UU, PtrUse.Offset)); break; } + + case Instruction::Call: { + auto *Call = cast(U); + // Ignore lifetime markers. + if (Call->isLifetimeStartOrEnd()) + break; + } + LLVM_FALLTHROUGH; default: LLVM_DEBUG(dbgs() << "Unhandled user of continuation frame pointer: " << *U << '\n'); @@ -432,17 +389,6 @@ void llvm::forwardContinuationFrameStoreToLoad(DominatorTree &DT, } } -PointerType *llvm::getWithSamePointeeType(PointerType *PtrTy, - unsigned AddressSpace) { -#if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 482880 - return PointerType::getWithSamePointeeType(PtrTy, AddressSpace); -#else - // New version of the code (also handles unknown version, which we treat as - // latest) - return PointerType::get(PtrTy->getContext(), AddressSpace); -#endif -} - static const char *toString(DXILShaderKind ShaderKind) { switch (ShaderKind) { case DXILShaderKind::Pixel: @@ -891,11 +837,10 @@ Value *llvm::getDXILSystemData(IRBuilder<> &B, Value *SystemData, return B.CreateInBoundsGEP(OrigSystemDataTy, SystemData, Indices); } -CallInst * -llvm::replaceIntrinsicCall(IRBuilder<> &B, Type *SystemDataTy, - Value *SystemData, DXILShaderKind Kind, - CallInst *Call, Module *GpurtLibrary, - CompilerUtils::CrossModuleInliner &Inliner) { +CallInst *llvm::replaceIntrinsicCall( + IRBuilder<> &B, Type *SystemDataTy, Value *SystemData, + lgc::rt::RayTracingShaderStage Kind, CallInst *Call, Module *GpurtLibrary, + CompilerUtils::CrossModuleInliner &Inliner) { B.SetInsertPoint(Call); auto IntrImplEntry = findIntrImplEntryByIntrinsicCall(Call); @@ -917,8 +862,8 @@ llvm::replaceIntrinsicCall(IRBuilder<> &B, Type *SystemDataTy, // For hit data accessors, get the hit data struct if (IntrImplEntry->AccessesHitData) { Function *GetHitData; - if (Kind == DXILShaderKind::AnyHit || - Kind == DXILShaderKind::Intersection) { + if (Kind == lgc::rt::RayTracingShaderStage::AnyHit || + Kind == lgc::rt::RayTracingShaderStage::Intersection) { auto *GetCandidateState = GpurtLibrary->getFunction(ContDriverFunc::GetCandidateStateName); assert(GetCandidateState && "Could not find GetCandidateState function"); @@ -984,10 +929,21 @@ llvm::replaceIntrinsicCall(IRBuilder<> &B, Type *SystemDataTy, } auto *NewCall = B.CreateCall(IntrImpl, Arguments); + Value *Replacement = NewCall; + if (isa(Call)) { + // Special handling for TriangleVertexPositionsOp + // GPURT returns { <3 x float>, <3 x float>, <3 x float> }, but shader + // requires [3 x <3 x float>]. + Replacement = PoisonValue::get(Call->getType()); + for (unsigned i = 0; i < 3; i++) { + Replacement = + B.CreateInsertValue(Replacement, B.CreateExtractValue(NewCall, i), i); + } + } LLVM_DEBUG(dbgs() << "Replacing " << *Call << " by " << *NewCall << "\n"); if (!Call->getType()->isVoidTy()) - Call->replaceAllUsesWith(NewCall); + Call->replaceAllUsesWith(Replacement); Inliner.inlineCall(*NewCall); B.SetInsertPoint(&*B.GetInsertPoint()); Call->eraseFromParent(); @@ -1012,7 +968,7 @@ static void replaceEnqueueIntrinsic(Function &F, Function *NewFunc) { } B.CreateCall(NewFunc, Args); - CInst->eraseFromParent(); + CompilerUtils::createUnreachable(B); } } } @@ -1068,11 +1024,59 @@ static void handleGetUninitialized(Function &Func) { auto *ArgTy = Func.getReturnType(); auto *Poison = PoisonValue::get(ArgTy); llvm::forEachCall(Func, [&](llvm::CallInst &CInst) { - CInst.replaceAllUsesWith(Poison); + IRBuilder<> B(&CInst); + // Create a frozen poison value so poison doesn't propagate into + // dependent values, e.g. when bitpacking the uninitialized value into + // a bitfield that should not be invalidated. + Value *Freeze = B.CreateFreeze(Poison); + CInst.replaceAllUsesWith(Freeze); CInst.eraseFromParent(); }); } +static void handleGetSetting(Function &F, ArrayRef Settings) { + auto *Ty = dyn_cast(F.getReturnType()); + if (!Ty) + report_fatal_error(Twine("Only integer settings are supported but '") + + F.getName() + "' does not return an integer"); + auto Name = F.getName(); + bool Consumed = Name.consume_front("_AmdGetSetting_"); + if (!Consumed) + report_fatal_error(Twine("Setting intrinsic needs to start with " + "'_AmdGetSetting_' but is called '") + + Name + "'"); + + uint64_t NameVal; + bool Failed = Name.getAsInteger(10, NameVal); + if (Failed) { + report_fatal_error( + Twine("Failed to parse _AmdGetSetting_ suffix as int: ") + Name); + } + + uint64_t Value = 0; + bool Found = false; + for (auto &Setting : Settings) { + if (Setting.NameHash == NameVal) { + Value = Setting.Value; + Found = true; + break; + } + } + if (!Found) { +#ifndef NDEBUG + errs() << Twine("Warning: Setting '") + Name + + "' is not defined, setting to 0\n"; +#endif + } + + auto *Val = ConstantInt::get(Ty, Value); + + forEachCall(F, [&](CallInst &Call) { + Call.replaceAllUsesWith(Val); + Call.eraseFromParent(); + }); +} + void llvm::terminateShader(IRBuilder<> &Builder, CallInst *CompleteCall) { Builder.SetInsertPoint(CompleteCall); @@ -1116,6 +1120,8 @@ bool llvm::earlyDriverTransform(Module &M) { // Import from metadata if set auto RtipLevel = ContHelper::tryGetRtip(M); auto Flags = ContHelper::tryGetFlags(M); + SmallVector GpurtSettings; + ContHelper::getGpurtSettings(M, GpurtSettings); bool Changed = false; // Replace Enqueue and Complete intrinsics @@ -1150,6 +1156,9 @@ bool llvm::earlyDriverTransform(Module &M) { } else if (Name.starts_with("_AmdGetUninitialized")) { Changed = true; handleGetUninitialized(F); + } else if (Name.starts_with("_AmdGetSetting")) { + Changed = true; + handleGetSetting(F, GpurtSettings); } } @@ -1184,6 +1193,23 @@ bool defaultMaterializable(Instruction &V); } // End namespace coro } // End namespace llvm +bool llvm::commonMaterializable(Instruction &Inst) { + if (coro::defaultMaterializable(Inst)) + return true; + + // Insert into constant. + if (isa(Inst) && + isa(Inst.getOperand(0))) { + return true; + } + + if (auto *Shuffle = dyn_cast(&Inst); + Shuffle && Shuffle->isSingleSource()) + return true; + + return false; +} + bool llvm::LgcMaterializable(Instruction &OrigI) { Instruction *V = &OrigI; @@ -1203,15 +1229,9 @@ bool llvm::LgcMaterializable(Instruction &OrigI) { break; } - if (coro::defaultMaterializable(*V)) + if (commonMaterializable(*V)) return true; - // Insert into constant. - if (isa(V) && - isa(V->getOperand(0))) { - return true; - } - if (auto *LI = dyn_cast(V)) { // load from constant address space if (LI->getPointerAddressSpace() == 4) diff --git a/llvmraytracing/lib/CpsStackLowering.cpp b/llvmraytracing/lib/CpsStackLowering.cpp index 921cd2fd12..ccb9a7c903 100644 --- a/llvmraytracing/lib/CpsStackLowering.cpp +++ b/llvmraytracing/lib/CpsStackLowering.cpp @@ -24,9 +24,13 @@ **********************************************************************************************************************/ #include "llvmraytracing/CpsStackLowering.h" +#include "compilerutils/CompilerUtils.h" #include "lgc/LgcCpsDialect.h" +#include "lgc/LgcRtDialect.h" +#include "llvm-dialects/Dialect/Builder.h" #include "llvm-dialects/Dialect/Visitor.h" #include "llvmraytracing/ContinuationsUtil.h" +#include "llvmraytracing/GpurtContext.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" @@ -49,24 +53,32 @@ CpsStackLowering::convertStackPtrToI32(TypeLowering &TypeLower, Type *Ty) { } // ===================================================================================================================== -// Lower continuation stack operations in the function -// -// @param Function : the function to be processed +// @param Func : the function to be processed +// @param GetGlobalMemBase: Get the base address for the stack. +// `nullptr` if there is no base address and the csp +// can be converted with ptrtoint. +// @param RequiresIncomingCsp: Whether the CSP argument should be appended to +// Func's signature. // @param CpsStorage : the alloca used for the holding the latest continuation -// stack pointer +// stack pointer. TODO Remove this argument. This function +// should be responsible for adding the alloca. // @return: The new function, if Function was mutated, or the Function argument. -Function *CpsStackLowering::lowerCpsStackOps(Function &Function, - Value *CpsStorage) { - assert(cast(CpsStorage)->getAllocatedType()->isIntegerTy()); - - Mod = Function.getParent(); +Function *CpsStackLowering::lowerCpsStackOps(Function *Func, + Function *GetGlobalMemBase, + bool RequiresIncomingCsp, + llvm::Value *CspStorage) { + Mod = Func->getParent(); StackSizeInBytes = 0; - CpsStackAlloca = cast(CpsStorage); + + if (CspStorage) + CpsStackAlloca = cast(CspStorage); + else + Func = addOrInitCsp(Func, GetGlobalMemBase, RequiresIncomingCsp); + TypeLower.addRule(std::bind(&CpsStackLowering::convertStackPtrToI32, this, std::placeholders::_1, std::placeholders::_2)); - auto *NewFunc = &Function; - if (lgc::cps::isCpsFunction(Function)) - NewFunc = TypeLower.lowerFunctionArguments(Function); + if (lgc::cps::isCpsFunction(*Func)) + Func = TypeLower.lowerFunctionArguments(*Func); static const auto Visitor = llvm_dialects::VisitorBuilder() .nest(&TypeLowering::registerVisitors) @@ -82,11 +94,15 @@ Function *CpsStackLowering::lowerCpsStackOps(Function &Function, .add(&CpsStackLowering::visitLoad) .add(&CpsStackLowering::visitStore) .build(); - Visitor.visit(*this, *NewFunc); + Visitor.visit(*this, *Func); TypeLower.finishPhis(); TypeLower.finishCleanup(); - return NewFunc; + visitContinueCalls(Func); + + CpsStackAlloca = nullptr; + + return Func; } // ===================================================================================================================== @@ -169,6 +185,52 @@ void CpsStackLowering::visitStore(llvm::StoreInst &Store) { Store.replaceUsesOfWith(Store.getPointerOperand(), Values[0]); } +// ===================================================================================================================== +// Add stack pointer to continue calls +// +// @param Func: the function where stack pointers should be added to continue +// calls +void CpsStackLowering::visitContinueCalls(llvm::Function *Func) { + llvm::forEachTerminator(Func, {Instruction::Unreachable, Instruction::Ret}, + [&](Instruction &Terminator) { + auto *BB = Terminator.getParent(); + if (&Terminator != &*BB->begin()) { + auto Before = --Terminator.getIterator(); + if (auto *CInst = dyn_cast(Before)) { + if (auto *Func = CInst->getCalledFunction()) { + auto Name = Func->getName(); + if (Name == "continuation.continue" || + Name == "continuation.waitContinue") + visitContinueCall(*CInst); + } + } + } + }); +} + +// ===================================================================================================================== +// Add stack pointer to continue call +// +// @param CInst: the continue call +void CpsStackLowering::visitContinueCall(llvm::CallInst &CInst) { + auto *Func = CInst.getCalledFunction(); + auto Name = Func->getName(); + SmallVector NewCallArgs{CInst.args()}; + IRBuilder<> Builder(&CInst); + + // If the function does not use the stack, pass-through the CSP argument. + Value *Csp = Builder.CreateLoad(Builder.getInt32Ty(), CpsStackAlloca); + + bool IsWaitContinue = Name.contains("waitContinue"); + const size_t CspInsertIndex = IsWaitContinue ? 2 : 1; + NewCallArgs.insert(NewCallArgs.begin() + CspInsertIndex, Csp); + + auto *NewCall = Builder.CreateCall(Func, NewCallArgs); + CInst.replaceAllUsesWith(NewCall); + NewCall->copyMetadata(CInst); + CInst.eraseFromParent(); +} + // ===================================================================================================================== // Lower ptrtoint instruction // @@ -333,3 +395,75 @@ Value *CpsStackLowering::getRealMemoryAddress(IRBuilder<> &Builder, return Builder.CreateGEP(I8, GepBase, {GepIndex}); } + +// ===================================================================================================================== +// Add stack pointer argument to the function or initialize the stack pointer +// from the initializer. +// +// @param GetGlobalMemBase: Get the base address for the stack. +// `nullptr` if there is no base address and the csp +// can be converted with ptrtoint. +Function *CpsStackLowering::addOrInitCsp(Function *F, + Function *GetGlobalMemBase, + bool RequiresIncomingCsp) { + CompilerUtils::CrossModuleInliner CrossInliner; + auto &GpurtContext = lgc::GpurtContext::get(Mod->getContext()); + auto &GpurtLibrary = GpurtContext.theModule ? *GpurtContext.theModule : *Mod; + IRBuilder<> Builder(F->getContext()); + Value *Initializer = nullptr; + + Builder.SetInsertPointPastAllocas(F); + CpsStackAlloca = Builder.CreateAlloca(Builder.getInt32Ty()); + CpsStackAlloca->setName("csp"); + + if (RequiresIncomingCsp) { + auto *FTy = F->getFunctionType(); + SmallVector NewArgTys{FTy->params()}; + + const size_t CspArgIndex = lgc::cps::isCpsFunction(*F) ? 1 : 0; + NewArgTys.insert(NewArgTys.begin() + CspArgIndex, Builder.getInt32Ty()); + + Function *NewFunc = CompilerUtils::mutateFunctionArguments( + *F, F->getReturnType(), NewArgTys, F->getAttributes()); + + Argument *CspArg = NewFunc->getArg(CspArgIndex); + CspArg->setName("cspInit"); + Initializer = CspArg; + + for (unsigned Idx = 0; Idx < F->arg_size(); ++Idx) { + // Skip the CSP argument during remapping. + Value *OldArg = F->getArg(Idx); + Value *NewArg = NewFunc->getArg(Idx >= CspArgIndex ? Idx + 1 : Idx); + NewArg->takeName(OldArg); + OldArg->replaceAllUsesWith(NewArg); + } + + F->replaceAllUsesWith(NewFunc); + F->eraseFromParent(); + + F = NewFunc; + } else if (lgc::rt::getLgcRtShaderStage(F) != + lgc::rt::RayTracingShaderStage::KernelEntry) { + // Init csp through intrinsic + auto *InitFun = + GpurtLibrary.getFunction(ContDriverFunc::GetContinuationStackAddrName); + assert(InitFun && "_cont_GetContinuationStackAddr not found."); + assert(InitFun->arg_size() == 0 && + InitFun->getReturnType()->isIntegerTy(32)); + + Initializer = CrossInliner.inlineCall(Builder, InitFun).returnValue; + } + + if (Initializer) + Builder.CreateStore(Initializer, CpsStackAlloca); + + // Get the global memory base address. + if (GetGlobalMemBase) { + auto *Base = CrossInliner.inlineCall(Builder, GetGlobalMemBase).returnValue; + auto *CspTy = + Builder.getInt8Ty()->getPointerTo(getLoweredCpsStackAddrSpace()); + setRealBasePointer(Builder.CreateIntToPtr(Base, CspTy)); + } + + return F; +} diff --git a/llvmraytracing/lib/DXILContIntrinsicPrepare.cpp b/llvmraytracing/lib/DXILContIntrinsicPrepare.cpp index d58d6711b4..98ba8a98fe 100644 --- a/llvmraytracing/lib/DXILContIntrinsicPrepare.cpp +++ b/llvmraytracing/lib/DXILContIntrinsicPrepare.cpp @@ -38,10 +38,8 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/InitializePasses.h" #include #include @@ -156,9 +154,11 @@ static bool isUtilFunction(StringRef Name) { "GetLocalRootIndex", "GetResumePointAddr", "GetRtip", + "GetSetting", "GetShaderKind", "GetTriangleHitAttributes", "GetUninitialized", + "GpurtVersionFlags", "I32Count", "IsEndSearch", "KernelEntry", diff --git a/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp b/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp index d5a9cc97ed..654738ffcc 100644 --- a/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp +++ b/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp @@ -504,7 +504,7 @@ bool DXILContLgcRtOpConverterPass::prepareEntryPointShaders() { analyzeShaderKinds(*M, ShaderKinds); for (auto &[Func, Kind] : ShaderKinds) { - auto Stage = ShaderStageHelper::dxilShaderKindToShaderStage(Kind); + auto Stage = ShaderStageHelper::dxilShaderKindToRtShaderStage(Kind); // Ignore non-raytracing shader stages if (!Stage.has_value()) diff --git a/llvmraytracing/lib/DXILContPostProcess.cpp b/llvmraytracing/lib/DXILContPostProcess.cpp index cc972ef0ab..ffce304fdc 100644 --- a/llvmraytracing/lib/DXILContPostProcess.cpp +++ b/llvmraytracing/lib/DXILContPostProcess.cpp @@ -39,8 +39,8 @@ #include "compilerutils/CompilerUtils.h" #include "lgc/LgcCpsDialect.h" #include "lgc/LgcRtDialect.h" +#include "llpc/GpurtEnums.h" #include "llvm-dialects/Dialect/Builder.h" -#include "llvm-dialects/Dialect/Visitor.h" #include "llvmraytracing/Continuations.h" #include "llvmraytracing/ContinuationsDialect.h" #include "llvmraytracing/ContinuationsUtil.h" @@ -55,13 +55,12 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" -#include "llvm/InitializePasses.h" -#include "llvm/Support/Error.h" #include using namespace llvm; @@ -106,16 +105,10 @@ class DXILContPostProcessPassImpl final { bool IsStart = true; Type *SystemDataTy = nullptr; unsigned SystemDataArgumentIndex = std::numeric_limits::max(); - - Value *CspStorage = nullptr; - Value *CspInitializerArg = nullptr; - Value *BasePointer = nullptr; }; private: - bool addIncomingCsp(); void lowerGetResumePointAddr(Function &F); - bool passOutgoingCsp(); void handleRegisterBufferGetPointer(Function &F, GlobalVariable *Payload); void handleValueI32Count(Function &F); void handleValueGetI32(Function &F); @@ -134,6 +127,7 @@ class DXILContPostProcessPassImpl final { bool handleIntrinsicCalls(llvm::ModuleAnalysisManager &AnalysisManager); bool replaceIntrinsicCallsAndSetupRayGen(); bool lowerCpsOps(); + void lowerJumpOp(lgc::cps::JumpOp &JumpOp); bool unfoldGlobals(); bool handleAmdInternals(); @@ -150,7 +144,7 @@ class DXILContPostProcessPassImpl final { // For performance reasons, we keep this list of continuation.{wait}Continue // calls here and update it when required. SmallVector ContinueCalls; - Function *GlobalMemBase = nullptr; + Function *GetGlobalMemBase = nullptr; }; // Collects all calls to continuation.[wait]continue @@ -196,7 +190,7 @@ static void reportContStateSizes(Module &M) { continue; DXILShaderKind ShaderKind = - ShaderStageHelper::shaderStageToDxilShaderKind(*Stage); + ShaderStageHelper::rtShaderStageToDxilShaderKind(*Stage); dbgs() << "Continuation state size of \"" << F.getName() << "\" (" << ShaderKind << "): " << OptStateSize.value() << " bytes\n"; } @@ -221,7 +215,7 @@ static void reportPayloadSizes(Module &M, ArrayRef ContinueCalls) { continue; DXILShaderKind ShaderKind = - ShaderStageHelper::shaderStageToDxilShaderKind(*Stage); + ShaderStageHelper::rtShaderStageToDxilShaderKind(*Stage); auto OptIncomingPayloadRegisterCount = ContHelper::tryGetIncomingRegisterCount(&F); bool HasIncomingPayload = OptIncomingPayloadRegisterCount.has_value(); @@ -310,9 +304,7 @@ static bool addGetAddrAndMDIntrinsicCalls(Module &M) { for (auto &F : M.functions()) { // Speed-up: Skip F if it cannot be used as pointer, e.g. dx intrinsics. - // Skip CPS functions here as well since they use - // lgc.cps.as.continuation.reference instead of getAddrAndMD. - if (!canBeUsedAsPtr(F) || lgc::cps::isCpsFunction(F)) + if (!canBeUsedAsPtr(F)) continue; CEWorkList.clear(); @@ -336,8 +328,10 @@ static bool addGetAddrAndMDIntrinsicCalls(Module &M) { while (!CEWorkList.empty()) { auto *CE = CEWorkList.pop_back_val(); - assert((isa(CE)) && - "Unexpected use of function!"); + assert( + (isa( + CE)) && + "Unexpected use of function!"); // Copy the users of CE into a local SmallVector before traversing it, // because we are going to add new users of CE that we do *not* want to @@ -361,8 +355,9 @@ static bool addGetAddrAndMDIntrinsicCalls(Module &M) { assert(CE->getType() == Type::getInt64Ty(M.getContext()) && "Function use should be as an i64!"); B.SetInsertPoint(I); - auto *AddrWithMD = - B.CreateCall(GetAddrAndMD, {B.CreatePtrToInt(CE, B.getInt64Ty())}); + + auto *AddrWithMD = B.CreateCall(GetAddrAndMD, {CE}); + // Can't RAUW because the CE might be used by different instructions. // Instead, manually replace the instruction's operand. [[maybe_unused]] bool Found = false; @@ -404,6 +399,19 @@ checkContinuationsModule(const Module &M, "Found a function with more than one setLocalRootIndex"); }); } + + // Check that resume functions do not have a stack size set. + for (auto &Func : M) { + if (auto *MD = dyn_cast_or_null( + Func.getMetadata(ContHelper::MDContinuationName))) { + auto *StartFunc = extractFunctionOrNull(MD->getOperand(0)); + bool IsStart = (&Func == StartFunc); + bool HasStackSizeMetadata = + ContHelper::tryGetStackSize(&Func).has_value(); + if (!IsStart && HasStackSizeMetadata) + report_fatal_error("Found resume function with stack size metadata!"); + } + } } /// Replace a global with a part of another global. @@ -426,137 +434,6 @@ static void replaceGlobal(const DataLayout &DL, GlobalVariable *Registers, G->eraseFromParent(); } -bool DXILContPostProcessPassImpl::addIncomingCsp() { - SmallVector Candidates; - - for (Function &Func : *Mod) { - if (Func.isDeclaration()) - continue; - - if (Func.hasMetadata(ContHelper::MDContinuationName)) { - Candidates.push_back(&Func); - continue; - } - - if (lgc::cps::isCpsFunction(Func)) { - Candidates.push_back(&Func); - continue; - } - } - - SmallVector> MappedFuncs; - for (auto &F : Candidates) { - Function *Func = F; - - Value *Initializer = nullptr; - Builder.SetInsertPointPastAllocas(Func); - - Value *Csp = Builder.CreateAlloca(Builder.getInt32Ty()); - Csp->setName("csp"); - - // Do an early lookup to avoid cluttering the code with conditional lookups. - // This will only be abandoned if F is cloned. - // Store a pointer to the function data for convenience reasons. - auto FuncIt = ToProcess.find(F); - FunctionData *FuncData = - FuncIt != ToProcess.end() ? &FuncIt->second : nullptr; - - if (!ContHelper::isLegacyEntryFunction(F)) { - auto *FTy = F->getFunctionType(); - SmallVector NewArgTys{FTy->params()}; - - const size_t CspArgIndex = lgc::cps::isCpsFunction(*F) ? 1 : 0; - NewArgTys.insert(NewArgTys.begin() + CspArgIndex, Builder.getInt32Ty()); - - Function *NewFunc = CompilerUtils::mutateFunctionArguments( - *Func, Func->getReturnType(), NewArgTys, Func->getAttributes()); - - Argument *CspArg = NewFunc->getArg(CspArgIndex); - CspArg->setName("cspInit"); - Initializer = CspArg; - - MappedFuncs.push_back({Func, NewFunc}); - - for (unsigned Idx = 0; Idx < Func->arg_size(); ++Idx) { - // Skip the CSP argument during remapping. - Value *OldArg = Func->getArg(Idx); - Value *NewArg = NewFunc->getArg(Idx >= CspArgIndex ? Idx + 1 : Idx); - NewArg->takeName(OldArg); - OldArg->replaceAllUsesWith(NewArg); - } - - // Finally, update the function pointer so we operate on the newly created - // function. - Func = NewFunc; - - // Do some bookkeeping to avoid issues with iterator invalidation possibly - // caused by inserting NewFunc into ToProcess. - - // If the function data exists, insert a new element, try to move the - // contents and return a pointer to the new space. If this invalidates the - // iterator, a new iterator is returned. Otherwise, just return a pointer - // to the possibly newly allocated storage. - if (FuncData) - FuncData = - &ToProcess.insert({NewFunc, std::move(*FuncData)}).first->second; - else - FuncData = &ToProcess.insert({NewFunc, {}}).first->second; - - FuncData->CspInitializerArg = CspArg; - } else { - // Init csp through intrinsic - auto *InitFun = GpurtLibrary->getFunction( - ContDriverFunc::GetContinuationStackAddrName); - assert(InitFun && "DXILContPostProcessPassImpl::addIncomingCsp: " - "_cont_GetContinuationStackAddr not found."); - assert(InitFun->arg_size() == 0 && - InitFun->getReturnType()->isIntegerTy(32)); - - Initializer = CrossInliner.inlineCall(Builder, InitFun).returnValue; - } - - Builder.CreateStore(Initializer, Csp); - - assert(FuncData && "DXILContPostProcessPassImpl::addIncomingCsp: Expected " - "FuncData to point to existing storage!"); - FuncData->CspStorage = Csp; - - // Store the global memory base address. - if (StackAddrspace == ContStackAddrspace::Global) { - assert(GlobalMemBase && "DXILContPostProcessPassImpl::addIncomingCsp: " - "GlobalMemBase cannot be nullptr!"); - - auto *Base = CrossInliner.inlineCall(Builder, GlobalMemBase).returnValue; - auto *CspTy = Builder.getInt8Ty()->getPointerTo( - StackLowering->getLoweredCpsStackAddrSpace()); - FuncData->BasePointer = Builder.CreateIntToPtr(Base, CspTy); - } - } - - // Replace references to the old function with references to the new (mapped) - // function. - while (!MappedFuncs.empty()) { - auto [OldFunc, NewFunc] = MappedFuncs.pop_back_val(); - for (User *U : make_early_inc_range(OldFunc->users())) { - if (auto *AsCRUser = dyn_cast(U)) { - Builder.SetInsertPoint(AsCRUser); - auto *NewAsCROp = - Builder.create(NewFunc); - U->replaceAllUsesWith(NewAsCROp); - AsCRUser->eraseFromParent(); - } - } - - OldFunc->replaceAllUsesWith( - ConstantExpr::getBitCast(NewFunc, OldFunc->getType())); - OldFunc->eraseFromParent(); - - ToProcess.erase(OldFunc); - } - - return !Candidates.empty(); -} - void DXILContPostProcessPassImpl::lowerGetResumePointAddr(Function &F) { auto *GetResumePointAddr = &F; @@ -667,68 +544,6 @@ void DXILContPostProcessPassImpl::lowerGetResumePointAddr(Function &F) { } } -// Append the CSP argument to all continuation.continue and -// continuation.waitContinue calls. -bool DXILContPostProcessPassImpl::passOutgoingCsp() { - bool Changed = false; - - collectContinueCalls(*Mod, ContinueCalls); - - Function *ContContinueFunc = Mod->getFunction("continuation.continue"); - Function *ContWaitContinueFunc = - Mod->getFunction("continuation.waitContinue"); - - SmallVector NewContinueCalls; - NewContinueCalls.reserve(ContinueCalls.size()); - - for (auto *CInst : make_early_inc_range(ContinueCalls)) { - Function *Parent = CInst->getFunction(); - - auto Data = ToProcess.find(Parent); - if (Data == ToProcess.end()) { - LLVM_DEBUG( - dbgs() - << "DXILContPostProcessPassImpl::passOutgoingCsp: Did not find " - "function data for function " - << Parent->getName() << "!"); - continue; - } - - Value *CspStorage = Data->second.CspStorage; - - SmallVector NewCallArgs{CInst->args()}; - Builder.SetInsertPoint(CInst); - - // If the function does not use the stack, pass-through the CSP argument. - Value *Csp = nullptr; - - if (!CspStorage) - Csp = Data->second.CspInitializerArg; - else - Csp = Builder.CreateLoad(Builder.getInt32Ty(), CspStorage); - - bool IsWaitContinue = - CInst->getCalledFunction()->getName().contains("waitContinue"); - const size_t CspInsertIndex = IsWaitContinue ? 2 : 1; - NewCallArgs.insert(NewCallArgs.begin() + CspInsertIndex, Csp); - - auto *NewCall = Builder.CreateCall( - IsWaitContinue ? ContWaitContinueFunc : ContContinueFunc, NewCallArgs); - CInst->replaceAllUsesWith(NewCall); - NewCall->copyMetadata(*CInst); - CInst->eraseFromParent(); - - NewContinueCalls.push_back(NewCall); - - Changed = true; - } - - // The list of continue calls is now final. - ContinueCalls = std::move(NewContinueCalls); - - return Changed; -} - void DXILContPostProcessPassImpl::handleRegisterBufferGetPointer( Function &F, GlobalVariable *Payload) { // Check calls that take the payload as argument @@ -992,6 +807,19 @@ void DXILContPostProcessPassImpl::initializeProcessableFunctionData() { if (!Stage) continue; + // For the kernel entry function in GPURT, we only care about its existence + // in @ToProcess, since we only want to create an alloca for the + // continuation stack pointer later (and do the lgc.cps lowering). + if (lgc::rt::getLgcRtShaderStage(&F) == + lgc::rt::RayTracingShaderStage::KernelEntry) { + FunctionData Data; + Data.Kind = DXILShaderKind::Compute; + [[maybe_unused]] bool DidInsert = + ToProcess.insert({&F, std::move(Data)}).second; + assert(DidInsert); + continue; + } + // Handle entry functions first if (auto *MD = dyn_cast_or_null( F.getMetadata(ContHelper::MDContinuationName))) { @@ -1003,7 +831,7 @@ void DXILContPostProcessPassImpl::initializeProcessableFunctionData() { } DXILShaderKind Kind = - ShaderStageHelper::shaderStageToDxilShaderKind(*Stage); + ShaderStageHelper::rtShaderStageToDxilShaderKind(*Stage); const bool IsCpsFunction = lgc::cps::isCpsFunction(F); switch (Kind) { @@ -1145,8 +973,10 @@ bool DXILContPostProcessPassImpl::replaceIntrinsicCalls( Builder.CreateStore(SystemDataArgument, SystemDataPtr); for (auto *Call : Data.IntrinsicCalls) - replaceIntrinsicCall(Builder, Data.SystemDataTy, SystemDataPtr, Data.Kind, - Call, GpurtLibrary, CrossInliner); + replaceIntrinsicCall( + Builder, Data.SystemDataTy, SystemDataPtr, + ShaderStageHelper::dxilShaderKindToRtShaderStage(Data.Kind).value(), + Call, GpurtLibrary, CrossInliner); return true; } @@ -1222,42 +1052,110 @@ bool DXILContPostProcessPassImpl::replaceIntrinsicCallsAndSetupRayGen() { // Entry point for all lgc.cps lowering. // bool DXILContPostProcessPassImpl::lowerCpsOps() { - SmallVector CpsFuncs; - bool Changed = false; + struct CpsVisitorState { + DXILContPostProcessPassImpl &Self; + bool &Changed; + llvm_dialects::Builder &Builder; + }; + + // Note: It is a bit unlucky that we are using both a visitor for + // lgc.cps.as.continuation.reference and lgc.cps.jump and a loop for the + // actual stack lowering. It would be nice to use a visitor for both of them, + // but currently, there seems to be no support in dialects for marrying both + // approaches: we would need a visitor that supports visiting function + // definitions as well. + static const auto CpsVisitor = + llvm_dialects::VisitorBuilder() + .add( + [](CpsVisitorState &State, + lgc::cps::AsContinuationReferenceOp &AsCrOp) { + Value *LoweredReference = + lgc::cps::lowerAsContinuationReference(State.Builder, + AsCrOp); + AsCrOp.replaceAllUsesWith(LoweredReference); + AsCrOp.eraseFromParent(); + State.Changed = true; + }) + .add( + [](CpsVisitorState &State, lgc::cps::JumpOp &JumpOp) { + State.Self.lowerJumpOp(JumpOp); + State.Changed = true; + }) + .build(); + + CpsVisitorState State{*this, Changed, Builder}; + + struct CspCandidateInfo { + bool RequiresCspArgument = false; + Function *Func = nullptr; + }; + + SmallVector CandidateInfo; + for (Function &Func : *Mod) { if (Func.isDeclaration()) continue; - auto FuncData = ToProcess.find(&Func); - Value *CspStorage = nullptr; - if (FuncData != ToProcess.end()) - CspStorage = FuncData->second.CspStorage; + if (Func.hasMetadata(ContHelper::MDContinuationName)) { + CandidateInfo.push_back( + {!ContHelper::isLegacyEntryFunction(&Func), &Func}); + continue; + } - if (!CspStorage) { - LLVM_DEBUG(dbgs() << "DXILContPostProcessPassImpl::lowerCpsOps: Did not " - "find the CSP storage alloca for " - << Func.getName() << ".\n"); + if (lgc::rt::getLgcRtShaderStage(&Func) == + lgc::rt::RayTracingShaderStage::KernelEntry) { + CandidateInfo.push_back({false, &Func}); continue; } - // Do the actual stack lowering. - if (*StackAddrspace == ContStackAddrspace::Global) { - // Ensure loads and stores are getting mapped to global memory (by adding - // the global memory base address). - assert(FuncData->second.BasePointer && - "DXILContPostProcessPassImpl::lowerCpsOps: Requested access to " - "global memory but no base pointer provided!"); - StackLowering->setRealBasePointer(FuncData->second.BasePointer); + if (lgc::cps::isCpsFunction(Func)) { + CandidateInfo.push_back( + {!ContHelper::isLegacyEntryFunction(&Func), &Func}); + continue; } + } + + for (auto &[RequiresCspArgument, F] : CandidateInfo) { + // Lower lgc.cps.jump and lgc.cps.as.continuation.reference ops. + CpsVisitor.visit(State, *F); + + auto Data = std::move(ToProcess[F]); + ToProcess.erase(F); - StackLowering->lowerCpsStackOps(Func, CspStorage); + auto *NewFunc = StackLowering->lowerCpsStackOps(F, GetGlobalMemBase, + RequiresCspArgument); + + ToProcess.insert({NewFunc, Data}); } + collectContinueCalls(*Mod, ContinueCalls); + return Changed; } +void DXILContPostProcessPassImpl::lowerJumpOp(lgc::cps::JumpOp &JumpOp) { + Builder.SetInsertPoint(&JumpOp); + Value *RCR = JumpOp.getTarget(); + + Function *Continue = ContHelper::isWaitAwaitCall(JumpOp) + ? llvm::getContinuationWaitContinue(*Mod) + : llvm::getContinuationContinue(*Mod); + + SmallVector Args; + Args.push_back(Builder.CreateZExt(RCR, Builder.getInt64Ty())); + + // If this is a wait call, then the wait mask is at the start of the tail + // argument list. + Args.append(JumpOp.getTail().begin(), JumpOp.getTail().end()); + + CallInst *ContinueCall = Builder.CreateCall(Continue, Args); + ContinueCall->copyMetadata(JumpOp); + ContHelper::removeIsWaitAwaitMetadata(*ContinueCall); + JumpOp.eraseFromParent(); +} + bool DXILContPostProcessPassImpl::unfoldGlobals() { // Replace register globals with indices into a bigger global const auto &DL = Mod->getDataLayout(); @@ -1335,7 +1233,7 @@ bool DXILContPostProcessPassImpl::run(ModuleAnalysisManager &AnalysisManager) { static_cast(StackAddrspace.value())); if (*StackAddrspace == ContStackAddrspace::Global) - GlobalMemBase = getContinuationStackGlobalMemBase(*GpurtLibrary); + GetGlobalMemBase = getContinuationStackGlobalMemBase(*GpurtLibrary); initializeProcessableFunctionData(); @@ -1344,7 +1242,6 @@ bool DXILContPostProcessPassImpl::run(ModuleAnalysisManager &AnalysisManager) { Changed |= handleAmdInternals(); Changed |= handleIntrinsicCalls(AnalysisManager); Changed |= replaceIntrinsicCallsAndSetupRayGen(); - Changed |= addIncomingCsp(); for (auto &F : make_early_inc_range(*Mod)) { auto FuncName = F.getName(); @@ -1359,7 +1256,6 @@ bool DXILContPostProcessPassImpl::run(ModuleAnalysisManager &AnalysisManager) { } } - Changed |= passOutgoingCsp(); Changed |= lowerCpsOps(); Changed |= fixupDxilMetadata(*Mod); diff --git a/llvmraytracing/lib/DXILSupport.cpp b/llvmraytracing/lib/DXILSupport.cpp index 4ffbef44b4..efc5a37c14 100644 --- a/llvmraytracing/lib/DXILSupport.cpp +++ b/llvmraytracing/lib/DXILSupport.cpp @@ -31,9 +31,7 @@ #include "llvmraytracing/Continuations.h" #include "llvmraytracing/ContinuationsUtil.h" -#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -46,12 +44,6 @@ using namespace llvm; #define DEBUG_TYPE "dxil-coro-split" -namespace llvm { -namespace coro { -bool defaultMaterializable(Instruction &V); -} // End namespace coro -} // End namespace llvm - /// Check if a value is in the given resource list. /// The metadata argument must be one of the lists from dx.resources, i.e. for /// SRVs. @@ -156,14 +148,8 @@ bool llvm::DXILMaterializable(Instruction &OrigI) { break; } - if (coro::defaultMaterializable(*V)) - return true; - - // Insert into constant. - if (isa(V) && - isa(V->getOperand(0))) { + if (commonMaterializable(*V)) return true; - } // Loads associated with dx.op.createHandle calls if (auto *LI = dyn_cast(V)) { diff --git a/llvmraytracing/lib/LegacyCleanupContinuations.cpp b/llvmraytracing/lib/LegacyCleanupContinuations.cpp index 6ef4b7b2b3..bec489fba9 100644 --- a/llvmraytracing/lib/LegacyCleanupContinuations.cpp +++ b/llvmraytracing/lib/LegacyCleanupContinuations.cpp @@ -40,16 +40,13 @@ #include "lgc/LgcCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "llvm-dialects/Dialect/Builder.h" -#include "llvm-dialects/Dialect/Visitor.h" #include "llvmraytracing/Continuations.h" #include "llvmraytracing/ContinuationsDialect.h" #include "llvmraytracing/ContinuationsUtil.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/InitializePasses.h" #include "llvm/Support/MathExtras.h" #include @@ -110,7 +107,6 @@ class LegacyCleanupContinuationsPassImpl { Function *Continue = nullptr; Function *WaitContinue = nullptr; MapVector ToProcess; - uint32_t MaxContStateBytes = 0; CompilerUtils::CrossModuleInliner CrossInliner; }; @@ -452,7 +448,8 @@ void LegacyCleanupContinuationsPassImpl::processContinuation( getWithSamePointeeType( UsedContFrameTy, FuncData.NewContState->getType()->getPointerAddressSpace())); - replaceAllPointerUses(&B, ContFrame, CastNewContState, InstsToRemove); + CompilerUtils::replaceAllPointerUses(&B, ContFrame, CastNewContState, + InstsToRemove); } else { // If there is no continuation state, replace it with a poison // value instead of a zero-sized stack allocation. @@ -505,8 +502,6 @@ void LegacyCleanupContinuationsPassImpl::handleFunctionEntry( uint64_t NeededStackSize = Data.getContStateStackBytes(); bool IsStart = F == Data.NewStart; - int64_t StackOffsetForPayloadSpill = 0; - if (IsStart) { // Add function metadata that stores how big the continuation state is in // bytes @@ -519,15 +514,18 @@ void LegacyCleanupContinuationsPassImpl::handleFunctionEntry( } if (NeededStackSize) { + Value *ContStateOnStack = nullptr; if (IsStart) { ContHelper::setStackSize(F, NeededStackSize); + + ContStateOnStack = + B.create(B.getInt32(NeededStackSize)); } else { - // Deallocate - B.create(B.getInt32(NeededStackSize)); + ContStateOnStack = + B.create(B.getInt32(NeededStackSize)); } - Value *ContStateOnStack = - B.create(B.getInt32(-StackOffsetForPayloadSpill)); + ContStateOnStack->setName("cont.state.stack.segment"); uint64_t ContStateNumI32s = divideCeil(Data.ContStateBytes, RegisterBytes); auto *ContStateTy = ArrayType::get(I32, ContStateNumI32s); @@ -589,14 +587,6 @@ void LegacyCleanupContinuationsPassImpl::handleSingleContinue( // Pass resume address as argument B.SetInsertPoint(Call); - // Allocate continuation state - uint64_t NeededStackSize = Data.getContStateStackBytes(); - if (NeededStackSize) { - auto *ContStateAlloc = - B.create(B.getInt32(NeededStackSize)); - ContStateAlloc->setName("cont.state.stack.segment"); - } - auto *ReturnAddrInt = B.CreatePtrToInt(ResumeFun, I64); bool IsWait = ContHelper::isWaitAwaitCall(*Call); @@ -641,9 +631,15 @@ void LegacyCleanupContinuationsPassImpl::handleReturn(ContinuationData &Data, LLVM_DEBUG(dbgs() << "Converting return to continue: " << *ContRet << "\n"); bool IsEntry = isa(ContRet->getArgOperand(0)); B.SetInsertPoint(ContRet); + + uint32_t NeededStackSize = Data.getContStateStackBytes(); + if (NeededStackSize > 0) + B.create(B.getInt32(NeededStackSize)); + if (IsEntry) { assert(ContRet->arg_size() == 1 && "Entry functions ignore the return value"); + llvm::terminateShader(B, ContRet); } else { // Create the call to continuation.continue, but with the same argument list @@ -694,8 +690,6 @@ llvm::PreservedAnalyses LegacyCleanupContinuationsPassImpl::run() { // Check if the continuation state is used in any function part for (auto &FuncData : ToProcess) { finalizeContinuationData(*FuncData.first, FuncData.second); - MaxContStateBytes = - std::max(MaxContStateBytes, FuncData.second.ContStateBytes); } Changed |= !ToProcess.empty(); diff --git a/llvmraytracing/lib/LgcCpsDialect.cpp b/llvmraytracing/lib/LgcCpsDialect.cpp index 7d55bb321b..3112541640 100644 --- a/llvmraytracing/lib/LgcCpsDialect.cpp +++ b/llvmraytracing/lib/LgcCpsDialect.cpp @@ -24,9 +24,11 @@ **********************************************************************************************************************/ #include "lgc/LgcCpsDialect.h" +#include "lgc/LgcRtDialect.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" @@ -39,12 +41,10 @@ #include "LgcCpsDialect.cpp.inc" using namespace llvm; +using namespace lgc::rt; constexpr const char CpsMetadata[] = "lgc.cps"; -// The maximum amount of dwords usable for passing arguments -constexpr int MaxArgumentDwords = 32; - // ===================================================================================================================== // Helper to determine how many dwords we require to store a variable of a given // type. Note that this does not include any padding except for pointers. @@ -160,21 +160,23 @@ lgc::cps::CpsLevel lgc::cps::getCpsLevelFromFunction(const Function &fn) { // ===================================================================================================================== // Transform a shader type into the corresponding CPS level. -lgc::cps::CpsLevel lgc::cps::getCpsLevelForShaderStage(CpsShaderStage stage) { - if (stage == CpsShaderStage::RayGen) +lgc::cps::CpsLevel +lgc::cps::getCpsLevelForShaderStage(RayTracingShaderStage stage) { + if (stage == RayTracingShaderStage::RayGeneration) return CpsLevel::RayGen; - if (stage == CpsShaderStage::Traversal) + if (stage == RayTracingShaderStage::Traversal) return CpsLevel::Traversal; - if (stage == CpsShaderStage::ClosestHit || stage == CpsShaderStage::Miss || - stage == CpsShaderStage::Callable) + if (stage == RayTracingShaderStage::ClosestHit || + stage == RayTracingShaderStage::Miss || + stage == RayTracingShaderStage::Callable) return CpsLevel::ClosestHit_Miss_Callable; - if (stage == CpsShaderStage::AnyHit) + if (stage == RayTracingShaderStage::AnyHit) return CpsLevel::AnyHit_CombinedIntersection_AnyHit; - if (stage == CpsShaderStage::Intersection) + if (stage == RayTracingShaderStage::Intersection) return CpsLevel::Intersection; llvm_unreachable("Cannot determine CPS level."); @@ -183,31 +185,40 @@ lgc::cps::CpsLevel lgc::cps::getCpsLevelForShaderStage(CpsShaderStage stage) { // ===================================================================================================================== // Tries to convert a shader stage into the corresponding CPS levels in which // the continued-to function can operate. -uint8_t lgc::cps::getPotentialCpsReturnLevels(lgc::cps::CpsShaderStage stage) { +uint8_t lgc::cps::getPotentialCpsReturnLevels(RayTracingShaderStage stage) { std::bitset<8> CpsLevels; auto SetLevel = [&CpsLevels](CpsLevel Level) -> void { CpsLevels.set(static_cast(Level)); }; - if (stage == CpsShaderStage::RayGen) { - SetLevel(CpsLevel::Traversal); - } else if (stage == CpsShaderStage::ClosestHit) { - SetLevel(CpsLevel::Traversal); - SetLevel(CpsLevel::ClosestHit_Miss_Callable); - } else if (stage == CpsShaderStage::Callable) { - SetLevel(CpsLevel::ClosestHit_Miss_Callable); - } else if (stage == CpsShaderStage::AnyHit) { + switch (stage) { + case RayTracingShaderStage::RayGeneration: + llvm_unreachable("RayGen does not return."); + break; + case RayTracingShaderStage::Callable: + // Callable returns to wherever CallShader is called (all stages except AHS + // and IS). + case RayTracingShaderStage::ClosestHit: + case RayTracingShaderStage::Miss: + case RayTracingShaderStage::Traversal: + // These stages returns to wherever TraceRay is called (RGS, CHS and miss). + SetLevel(CpsLevel::RayGen); SetLevel(CpsLevel::ClosestHit_Miss_Callable); + break; + case RayTracingShaderStage::AnyHit: + // AHS returns to Traversal (triangle intersection) or IS (procedural + // intersection). + SetLevel(CpsLevel::Traversal); SetLevel(CpsLevel::Intersection); - } else if (stage == CpsShaderStage::Intersection) { - SetLevel(CpsLevel::ClosestHit_Miss_Callable); - SetLevel(CpsLevel::AnyHit_CombinedIntersection_AnyHit); - } else if (stage == CpsShaderStage::Miss) { + break; + case RayTracingShaderStage::Intersection: + // IS returns to Traversal only. SetLevel(CpsLevel::Traversal); - SetLevel(CpsLevel::ClosestHit_Miss_Callable); - } else { + break; + default: llvm_unreachable("Cannot determine CPS level."); + break; } return static_cast(CpsLevels.to_ulong()); @@ -250,3 +261,21 @@ Value *lgc::cps::popStateFromCpsStack(llvm_dialects::Builder &builder, return NewState; } + +// ===================================================================================================================== +// Lower lgc.cps.as.continuation.reference operations into an integer +// representation of the pointer or a passed relocation. Return the new +// reference. +Value *lgc::cps::lowerAsContinuationReference( + IRBuilder<> &Builder, lgc::cps::AsContinuationReferenceOp &AsCrOp, + Value *Relocation) { + Builder.SetInsertPoint(&AsCrOp); + Value *Reference = nullptr; + + if (Relocation) + Reference = Relocation; + else + Reference = Builder.CreatePtrToInt(AsCrOp.getFn(), AsCrOp.getType()); + + return Reference; +} diff --git a/llvmraytracing/lib/LgcRtDialect.cpp b/llvmraytracing/lib/LgcRtDialect.cpp index c73f40059e..7d4ee1c3ec 100644 --- a/llvmraytracing/lib/LgcRtDialect.cpp +++ b/llvmraytracing/lib/LgcRtDialect.cpp @@ -173,15 +173,15 @@ void lgc::rt::setShaderArgSize(Function *func, size_t size) { // ============================================================================================== // Get attribute size (in bytes) metadata for a ray-tracing shader function. -size_t lgc::rt::getShaderHitAttributeSize(const Function *func) { +std::optional lgc::rt::getShaderHitAttributeSize(const Function *func) { MDNode *node = func->getMetadata(AttributeSizeMetadata); if (!node) - return 0; + return std::nullopt; if (auto *value = mdconst::dyn_extract(node->getOperand(0))) return value->getZExtValue(); - return 0; + return std::nullopt; } // ============================================================================================== diff --git a/llvmraytracing/lib/LowerAwait.cpp b/llvmraytracing/lib/LowerAwait.cpp index 667cc8f475..1ff9b8fbd5 100644 --- a/llvmraytracing/lib/LowerAwait.cpp +++ b/llvmraytracing/lib/LowerAwait.cpp @@ -37,7 +37,6 @@ #include "compilerutils/CompilerUtils.h" #include "lgc/LgcCpsDialect.h" #include "llvm-dialects/Dialect/Builder.h" -#include "llvm-dialects/Dialect/Dialect.h" #include "llvm-dialects/Dialect/Visitor.h" #include "llvmraytracing/Continuations.h" #include "llvmraytracing/ContinuationsDialect.h" @@ -46,7 +45,6 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/InitializePasses.h" #include using namespace llvm; diff --git a/llvmraytracing/lib/LowerRaytracingPipeline.cpp b/llvmraytracing/lib/LowerRaytracingPipeline.cpp index 8308378230..bb3791ea4d 100644 --- a/llvmraytracing/lib/LowerRaytracingPipeline.cpp +++ b/llvmraytracing/lib/LowerRaytracingPipeline.cpp @@ -27,8 +27,7 @@ // // This file implements the frontend part for coroutine support for lgc.rt ops. // - Add a global for the continuation stack pointer. -// - Introduce a global for the payload. Mark the payload global to be -// transformed by the RegisterBufferPass. +// - Introduce a global for the payload. // - Replace traceRay or callShader function calls with a compiler generated // code snippet. The snippets call setup and teardown hooks and calls await to // mark the continuation point @@ -61,7 +60,6 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/IR/Verifier.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -82,6 +80,24 @@ using namespace lgc::cps; using namespace lgc::rt; namespace { + +// Create a GEP if I is non-null, otherwise return the pointer. +static Value *SimplifyingCreateConstGEP1_32(IRBuilder<> &B, Type *Ty, + Value *Ptr, uint32_t I) { + // A GEP with a single zero index is redundant with opaque pointers + if (I == 0) + return Ptr; + return B.CreateConstGEP1_32(Ty, Ptr, I); +} + +static Value *SimplifyingCreateConstInBoundsGEP1_32(IRBuilder<> &B, Type *Ty, + Value *Ptr, uint32_t I) { + // A GEP with a single zero index is redundant with opaque pointers + if (I == 0) + return Ptr; + return B.CreateConstInBoundsGEP1_32(Ty, Ptr, I); +} + // Helper struct to avoid recursively passing these arguments struct PayloadCopyHelper { Module &M; @@ -97,6 +113,10 @@ struct PayloadCopyHelper { SmallDenseSet *CopiedNodes; Value *Serialization; const PAQSerializationLayout *Layout; + // Pointer to the spilled payload (loaded from LocalPayload) + Value *SpilledPayloadPtr; + // Number of registers/dwords that are stored in registers + uint32_t PayloadRegisterCount; void copyPayloadRecursively(const PAQNode *Node) { if (CopiedNodes && CopiedNodes->contains(Node)) { @@ -132,70 +152,59 @@ struct PayloadCopyHelper { // Perform copy for each index interval (i.e, for each contiguous range of // storage memory) void copyField(Type *FieldTy, const PAQIndexIntervals &Intervals) { - auto *I32 = Type::getInt32Ty(M.getContext()); // Pointer to the node field in the local payload auto *LocalFieldPtr = B.CreateInBoundsGEP(&PayloadTy, LocalPayload, PayloadIdxList); - // If the field is serialized in multiple intervals in the global, - // we perform a manual bytewise copy using i32 and i8. - // However, if the field is serialized using a single, contiguous interval - // and does not have stricter alignment requirements than i32, - // then we can just load/store the field type from/to the global storage. - // - // We currently restrict this mechanism to single-DWord fields to avoid - // issues with the RegisterBuffer pass which struggles with loads and stores - // of large vector types, leading to bad IR with additional allocas. - // TODO: Remove this restriction once we have moved to LLPC-style - // continuations without the RegisterBuffer pass. - const DataLayout &DL = M.getDataLayout(); - if (Intervals.size() == 1 && - DL.getABITypeAlign(FieldTy) <= DL.getABITypeAlign(I32) && - Intervals[0].size() == 1) { - - // Do a single load+store - Value *Src = LocalFieldPtr; - - auto *GlobalIntervalI32Ptr = B.CreateInBoundsGEP( - Layout->SerializationTy, Serialization, - {B.getInt32(0), B.getInt32(0), B.getInt32(Intervals[0].Begin)}); - Value *Dst = B.CreateBitCast( - GlobalIntervalI32Ptr, - FieldTy->getPointerTo( - GlobalIntervalI32Ptr->getType()->getPointerAddressSpace())); - - if (GlobalAccessKind != PAQAccessKind::Write) - std::swap(Src, Dst); - - auto *Val = B.CreateLoad(FieldTy, Src); - B.CreateStore(Val, Dst); - return; - } - - // I32 pointer to start of field in local payload - Value *FieldI32Ptr = B.CreateBitCast( - LocalFieldPtr, - I32->getPointerTo(LocalFieldPtr->getType()->getPointerAddressSpace())); - // Counts how many bytes have already been copied unsigned FieldByteOffset = 0; unsigned FieldNumBytes = M.getDataLayout().getTypeStoreSize(FieldTy).getFixedValue(); - for (unsigned IntervalIdx = 0; IntervalIdx < Intervals.size(); - ++IntervalIdx) { - const PAQIndexInterval &Interval = Intervals[IntervalIdx]; - // I32 pointer to start of current interval in global payload - auto *GlobalIntervalI32Ptr = B.CreateInBoundsGEP( - Layout->SerializationTy, Serialization, - {B.getInt32(0), B.getInt32(0), B.getInt32(Interval.Begin)}); + + for (auto [IntervalIdx, CompleteInterval] : enumerate(Intervals)) { + copyFieldInterval(LocalFieldPtr, &FieldByteOffset, FieldNumBytes, + CompleteInterval); + } + + assert(FieldByteOffset == FieldNumBytes && "Inconsistent storage size!"); + } + + void copyFieldInterval(Value *LocalFieldPtr, unsigned *FieldByteOffset, + unsigned FieldNumBytes, + const PAQIndexInterval CompleteInterval) { + auto *I32 = Type::getInt32Ty(M.getContext()); + // Split interval into registers and memory part. + // Map an interval to its register or memory pointer. + SmallVector, 2> TmpIntervals; + + if (CompleteInterval.Begin < PayloadRegisterCount) { + PAQIndexInterval Interval = { + CompleteInterval.Begin, + std::min(CompleteInterval.End, PayloadRegisterCount)}; + // Pointer to start of current interval in global payload + auto *GlobalIntervalI32Ptr = SimplifyingCreateConstInBoundsGEP1_32( + B, I32, Serialization, Interval.Begin); + TmpIntervals.push_back({Interval, GlobalIntervalI32Ptr}); + } + if (CompleteInterval.End > PayloadRegisterCount) { + PAQIndexInterval Interval = { + std::max(CompleteInterval.Begin, PayloadRegisterCount), + CompleteInterval.End}; + // Pointer to start of current interval in global payload + auto *GlobalIntervalI32Ptr = SimplifyingCreateConstInBoundsGEP1_32( + B, I32, SpilledPayloadPtr, Interval.Begin - PayloadRegisterCount); + TmpIntervals.push_back({Interval, GlobalIntervalI32Ptr}); + } + + for (auto [Interval, GlobalIntervalI32Ptr] : TmpIntervals) { // Obtain i32-based index from byte-offset. We only expect // to increase FieldByteOffset by a non-multiple of RegisterBytes // in the last iteration, so here it should always be divisible - unsigned FieldI32Offset = divideCeil(FieldByteOffset, RegisterBytes); - assert(FieldByteOffset == FieldI32Offset * RegisterBytes); + unsigned FieldI32Offset = *FieldByteOffset / RegisterBytes; + assert(*FieldByteOffset == FieldI32Offset * RegisterBytes); // I32 pointer into field, offset by FieldI32Offset - auto *FieldIntervalI32Ptr = - B.CreateConstGEP1_32(I32, FieldI32Ptr, FieldI32Offset); + auto *FieldIntervalI32Ptr = SimplifyingCreateConstInBoundsGEP1_32( + B, I32, LocalFieldPtr, FieldI32Offset); // Determine Src and Dst auto *Src = FieldIntervalI32Ptr; @@ -205,18 +214,14 @@ struct PayloadCopyHelper { unsigned NumCopyBytes = RegisterBytes * Interval.size(); - unsigned FieldNumRemainingBytes = FieldNumBytes - FieldByteOffset; + unsigned FieldNumRemainingBytes = FieldNumBytes - *FieldByteOffset; if (NumCopyBytes > FieldNumRemainingBytes) { - assert(IntervalIdx + 1 == Intervals.size() && - "Partial storage interval is only allowed for last interval!"); NumCopyBytes = FieldNumRemainingBytes; } copyBytes(B, Dst, Src, NumCopyBytes); - FieldByteOffset += NumCopyBytes; + *FieldByteOffset += NumCopyBytes; } - - assert(FieldByteOffset == FieldNumBytes && "Inconsistent storage size!"); } }; @@ -293,7 +298,7 @@ class LowerRaytracingPipelinePassImpl final { }; struct FunctionData { - DXILShaderKind Kind = DXILShaderKind::Invalid; + RayTracingShaderStage Kind = RayTracingShaderStage::Count; SmallVector TraceRayCalls; SmallVector ReportHitCalls; SmallVector CallShaderCalls; @@ -301,6 +306,7 @@ class LowerRaytracingPipelinePassImpl final { SmallVector IntrinsicCalls; SmallVector ShaderIndexCalls; SmallVector ShaderRecordBufferCalls; + SmallVector JumpCalls; /// Pointer to the alloca'd system data object in this function AllocaInst *SystemData = nullptr; @@ -327,6 +333,12 @@ class LowerRaytracingPipelinePassImpl final { /// hit attributes type, incoming for AnyHit and ClosestHit, outgoing for /// Intersection Type *HitAttributes = nullptr; + + /// The payload storage and its type belongs to this function. + Value *PayloadStorage = nullptr; + Type *PayloadStorageTy = nullptr; + /// The starting dword of payload storage argument + uint32_t FirstPayloadArgumentDword = 0; }; /// Needed data for handling the end of a function @@ -364,14 +376,13 @@ class LowerRaytracingPipelinePassImpl final { void collectGpuRtFunctions(); - // Copy the payload content between global payload and local payload. - // Excludes the stack pointer or hit attributes which may also reside in - // payload storage. If Stage is not set, all fields in SerializationInfo are - // copied. Used for CallShader accesses which are not PAQ qualified and do not - // have PAQShaderStage values. - // If CopiedNodes is set, nodes contained will not be copied, and all copied - // nodes are added to it. - void copyPayload(Type &PayloadTy, Value *LocalPayload, + // Copy the payload content between (global) payload storage and local + // payload. Excludes the stack pointer or hit attributes which may also reside + // in payload storage. If Stage is not set, all fields in SerializationInfo + // are copied. Used for CallShader accesses which are not PAQ qualified and do + // not have PAQShaderStage values. If CopiedNodes is set, nodes contained will + // not be copied, and all copied nodes are added to it. + void copyPayload(Type &PayloadTy, Value *LocalPayload, Value *PayloadStorage, std::optional Stage, PAQAccessKind GlobalAccessKind, const PAQSerializationLayout &Layout, @@ -385,7 +396,7 @@ class LowerRaytracingPipelinePassImpl final { // copied (i.e. only copying write(miss) : read(caller) fields). void copyTraceRayPayloadIncomingToCaller( const PAQTraceRaySerializationInfo &PAQSerializationInfo, - Value *LocalPayload); + Value *LocalPayload, Value *PayloadStorage); // Caller-save payload registers before CallShader() or TraceRay(), // which can override payload registers. A register needs to be saved @@ -393,12 +404,14 @@ class LowerRaytracingPipelinePassImpl final { // This includes the payload memory pointer if present. // SavedRegisters maps indices of payload registers to their saved values. void savePayloadRegistersBeforeRecursion( - DXILShaderKind Kind, const PAQSerializationLayout &IncomingLayout, + Value *PayloadStorage, RayTracingShaderStage Kind, + const PAQSerializationLayout &IncomingLayout, const PAQSerializationLayout &OutgoingLayout, SmallVectorImpl &SavedRegisterValues); // Restore previously saved registers. void restorePayloadRegistersAfterRecursion( + Value *PayloadStorage, const SmallVectorImpl &SavedRegisterValues); void createPayloadGlobal(); @@ -421,7 +434,7 @@ class LowerRaytracingPipelinePassImpl final { Instruction *insertCpsAwait(Type *ReturnTy, Value *ShaderAddr, Instruction *Call, ArrayRef Args, ContinuationCallType CallType, - lgc::cps::CpsShaderStage ShaderStage); + RayTracingShaderStage ShaderStage); MapVector ToProcess; Module *Mod; @@ -438,7 +451,10 @@ class LowerRaytracingPipelinePassImpl final { Type *TraversalDataTy; /// System data type passed to ClosestHit and Miss Type *HitMissDataTy; - GlobalVariable *Payload; + /// Dispatch system data type passed to RayGen and others + Type *DispatchSystemDataTy; + + GlobalVariable *PayloadStorageGlobal; // Function definitions and declarations from HLSL // Driver implementation that returns if AcceptHitAndEndSearch was called @@ -449,15 +465,12 @@ class LowerRaytracingPipelinePassImpl final { Function *SetTriangleHitAttributes; Function *GetLocalRootIndex; Function *SetLocalRootIndex; - Function *SetupRayGen; Function *TraceRay; Function *CallShader; Function *ReportHit; Function *AcceptHit; Function *GetSbtAddress; Function *GetSbtStride; - - Function *RegisterBufferSetPointerBarrier; }; } // namespace @@ -499,33 +512,11 @@ void ModuleMetadataState::updateModuleMetadata() const { ContHelper::setStackAddrspace(Mod, StackAddrspace); } -lgc::cps::CpsShaderStage -convertShaderKindToCpsShaderStage(DXILShaderKind Kind) { - switch (Kind) { - case DXILShaderKind::RayGeneration: - return CpsShaderStage::RayGen; - case DXILShaderKind::Intersection: - return CpsShaderStage::Intersection; - case DXILShaderKind::AnyHit: - return CpsShaderStage::AnyHit; - case DXILShaderKind::ClosestHit: - return CpsShaderStage::ClosestHit; - case DXILShaderKind::Miss: - return CpsShaderStage::Miss; - case DXILShaderKind::Callable: - return CpsShaderStage::Callable; - default: - llvm_unreachable( - "convertShaderKindToCpsShaderStage: Invalid shader kind provided!"); - break; - } -} - // Create a lgc.cps.await operation for a given shader address. Instruction *LowerRaytracingPipelinePassImpl::insertCpsAwait( Type *ReturnTy, Value *ShaderAddr, Instruction *Call, ArrayRef Args, ContinuationCallType CallType, - CpsShaderStage ShaderStage) { + RayTracingShaderStage ShaderStage) { Builder.SetInsertPoint(Call); Value *CR = nullptr; @@ -534,15 +525,15 @@ Instruction *LowerRaytracingPipelinePassImpl::insertCpsAwait( else CR = ShaderAddr; - CpsShaderStage CallStage = CpsShaderStage::Count; + RayTracingShaderStage CallStage = RayTracingShaderStage::Count; if (CallType == ContinuationCallType::Traversal) - CallStage = CpsShaderStage::Traversal; + CallStage = RayTracingShaderStage::Traversal; else if (CallType == ContinuationCallType::CallShader) - CallStage = CpsShaderStage::Callable; + CallStage = RayTracingShaderStage::Callable; else if (CallType == ContinuationCallType::AnyHit) - CallStage = CpsShaderStage::AnyHit; + CallStage = RayTracingShaderStage::AnyHit; - assert(CallStage != CpsShaderStage::Count && + assert(CallStage != RayTracingShaderStage::Count && "LowerRaytracingPipelinePassImpl::insertCpsAwait: Invalid " "call stage before inserting lgc.cps.await operation!"); @@ -712,15 +703,14 @@ void LowerRaytracingPipelinePassImpl::handleRestoreSystemData(CallInst *Call) { auto *SystemData = Call->getArgOperand(0); // Set local root signature on re-entry - assert(GetLocalRootIndex && "Could not find GetLocalRootIndex function"); auto *LocalIndexSystemDataTy = cast(getFuncArgPtrElementType(GetLocalRootIndex, 0)); auto *LocalIndexSystemData = getDXILSystemData( Builder, SystemData, SystemDataTy, LocalIndexSystemDataTy); - auto Stage = lgc::rt::getLgcRtShaderStage(Call->getFunction()); + auto Stage = getLgcRtShaderStage(Call->getFunction()); Value *LocalIndex = nullptr; - if (Stage == lgc::rt::RayTracingShaderStage::RayGeneration) + if (Stage == RayTracingShaderStage::RayGeneration) LocalIndex = Builder.getInt32(0); else LocalIndex = @@ -736,7 +726,6 @@ void LowerRaytracingPipelinePassImpl::handleRestoreSystemData(CallInst *Call) { void LowerRaytracingPipelinePassImpl::replaceReportHitCall(FunctionData &Data, CallInst *Call) { assert(ReportHit && "ReportHit not found"); - Function *F = Call->getFunction(); replaceCall(Data, Call, ReportHit, ContinuationCallType::AnyHit); @@ -750,37 +739,10 @@ void LowerRaytracingPipelinePassImpl::replaceReportHitCall(FunctionData &Data, Instruction *Then = SplitBlockAndInsertIfThen(IsEnd, &*Builder.GetInsertPoint(), true); Builder.SetInsertPoint(Then); - SystemData = getDXILSystemData(Builder, Data.SystemData, Data.SystemDataTy, - cast(Data.ReturnTy)); - Value *RetSystemData = Builder.CreateLoad(Data.ReturnTy, SystemData); - - Instruction *Ret = nullptr; - if (MetadataState.isInLgcCpsMode()) { - uint32_t CpsRetLevel = getPotentialCpsReturnLevels( - convertShaderKindToCpsShaderStage(Data.Kind)); - // When jumping to the ReturnAddress of parent function (i.e. the resume - // part of the caller of the parent function), the RCR and ShaderIndex - // are not important anymore, just pass Poison. - // Argument list: %rcr, %shader-index, %system-data. - SmallVector TailArgs = {PoisonValue::get(I32), - PoisonValue::get(I32), RetSystemData}; - Ret = Builder.create( - F->getArg(CpsArgIdxReturnAddr), CpsRetLevel, - PoisonValue::get(StructType::get(Builder.getContext())), TailArgs); - Builder.CreateUnreachable(); - } else { - Ret = Builder.CreateRet(RetSystemData); - } - - // Assume worst-case payload size for Intersection. See the note on the - // incoming payload size. - ContHelper::setOutgoingRegisterCount( - Ret, MetadataState.getMaxPayloadRegisterCount()); - // Intentionally do NOT update MaxUsedPayloadRegisterCount: Intersection - // assumes the worst-case size, but this doesn't mean it actually occurs. - // Remove trailing unreachable - Then->eraseFromParent(); + FunctionEndData EData; + EData.Terminator = Then; + processFunctionEnd(Data, EData); } /// Replace a call to Await with @@ -860,21 +822,19 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( // Copy to payload storage Value *CastPayload = Builder.CreateBitCast( - Payload, I32->getPointerTo(Payload->getAddressSpace())); + Data.PayloadStorage, + I32->getPointerTo( + Data.PayloadStorage->getType()->getPointerAddressSpace())); Builder.CreateStore(LocalPayloadMem, CastPayload); - // Barrier to ensure that accesses to the potentially in-memory parts of - // the payload are not re-ordered before this store. More precisely, later - // we will insert a load to the payload memory pointer at these accesses. - // These loads must be after the store. - Builder.CreateCall(RegisterBufferSetPointerBarrier, {Payload}); // Set stacksize metadata on F setStacksizeMetadata(*Call->getFunction(), Data.PayloadSpillSize); } // Copy local payload to global payload, before await call (e.g. TraceRay, // CallShader) - copyPayload(*PayloadOrAttrsTy, PayloadOrAttrs, ShaderStage, - PAQAccessKind::Write, *OutgoingSerializationLayout); + copyPayload(*PayloadOrAttrsTy, PayloadOrAttrs, Data.PayloadStorage, + ShaderStage, PAQAccessKind::Write, + *OutgoingSerializationLayout); } auto *ShaderAddr = Call->getArgOperand(0); @@ -883,11 +843,21 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( SmallVector ArgTys; SmallVector Args; + bool IsWait = + (Call->getCalledFunction()->getName().starts_with("_AmdWaitAwait")); + if (MetadataState.isInLgcCpsMode()) { - // For LgcCps, skip function-addr and also return-addr/wait-mask, the - // return-addr will be filled at late stage of continuation transform. Add - // shader-index so that the callee cps function get correct shader-index - // being passed in. + // For LgcCps, skip function-addr, the return address will be filled at late + // stage of continuation transform. Add shader index so that the callee cps + // function get correct shader-index being passed in. + + // Append the wait mask to the begin of the tail args. + if (IsWait) { + constexpr static uint32_t WaitMaskIdx = 1; + ArgTys.push_back(FTy->getParamType(WaitMaskIdx)); + Args.push_back(Call->getArgOperand(WaitMaskIdx)); + } + ArgTys.push_back(I32); auto *ShaderIndex = CrossInliner @@ -906,7 +876,7 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( Args.append(Call->arg_begin() + 1, Call->arg_end()); } - auto *SystemDataTy = SetupRayGen->getReturnType(); + auto *SystemDataTy = DispatchSystemDataTy; if (CallType == ContinuationCallType::AnyHit) { assert(TraversalDataTy && "Failed to detect traversal system data type"); SystemDataTy = TraversalDataTy; @@ -917,11 +887,48 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( } Instruction *Annotatable = nullptr; - Instruction *NewCall = nullptr; + Value *NewCall = nullptr; + if (MetadataState.isInLgcCpsMode()) { - NewCall = insertCpsAwait(Call->getType(), ShaderAddr, Call, Args, CallType, - convertShaderKindToCpsShaderStage(Data.Kind)); - Annotatable = NewCall; + // Add padding so that payload starts at a fixed dword. + // NOTE: Minus 1 as return address is not included + ContHelper::addPaddingType(*DL, *Context, ArgTys, + Data.FirstPayloadArgumentDword - 1); + Args.push_back(PoisonValue::get(ArgTys.back())); + + // Put payload at last + auto OutgoingPayloadI32s = + std::min(OutgoingSerializationLayout + ? OutgoingSerializationLayout->NumStorageI32s + : MetadataState.getMaxPayloadRegisterCount(), + MetadataState.getMaxPayloadRegisterCount()); + auto *OutgoingPayloadTy = ArrayType::get(I32, OutgoingPayloadI32s); + ArgTys.push_back(OutgoingPayloadTy); + Args.push_back(Builder.CreateLoad(OutgoingPayloadTy, Data.PayloadStorage)); + + auto *OrigRetTy = Call->getType(); + + // Add padding so that returned payload starts at a fixed dword. + // NOTE: Minus 2 as return address and shader index are not included. + auto *PaddingTy = ContHelper::getPaddingType( + *DL, *Context, OrigRetTy, Data.FirstPayloadArgumentDword - 2); + + // Also need to return payload + auto *NewRetTy = + StructType::get(Builder.getContext(), + {OrigRetTy, PaddingTy, + ArrayType::get(I32, ReturnedRegisterCount.value())}); + + Annotatable = + insertCpsAwait(NewRetTy, ShaderAddr, Call, Args, CallType, Data.Kind); + + NewCall = Annotatable; + + auto *ReturnedPayload = Builder.CreateExtractValue(NewCall, 2); + // Store returned payload to make the payload argument being updated. + Builder.CreateStore(ReturnedPayload, Data.PayloadStorage); + + NewCall = Builder.CreateExtractValue(NewCall, 0); } else { auto *ShaderTy = FunctionType::get(TokenTy, ArgTys, false); auto *ShaderFun = @@ -932,13 +939,13 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( getContinuationAwait(*Mod, TokenTy, cast(SystemDataTy)); NewCall = Builder.CreateCall(Await, {Token}); Annotatable = Token; - - // For WaitAwait, add metadata indicating that we wait. After coroutine - // passes, we then generate a waitContinue on the awaited function. - if (Call->getCalledFunction()->getName().starts_with("_AmdWaitAwait")) - ContHelper::setIsWaitAwaitCall(*Token); } + // For WaitAwait, add metadata indicating that we wait. After coroutine + // passes, we then generate a waitContinue on the awaited function. + if (IsWait) + ContHelper::setIsWaitAwaitCall(*cast(Annotatable)); + ContHelper::setReturnedRegisterCount(Annotatable, ReturnedRegisterCount.value()); @@ -963,12 +970,13 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( if (CallType == ContinuationCallType::CallShader) { // For CallShader, there is only a single layout // Copy global payload to local payload, after CallShader call - copyPayload(*PayloadOrAttrsTy, PayloadOrAttrs, ShaderStage, - PAQAccessKind::Read, *OutgoingSerializationLayout); + copyPayload(*PayloadOrAttrsTy, PayloadOrAttrs, Data.PayloadStorage, + ShaderStage, PAQAccessKind::Read, + *OutgoingSerializationLayout); } else { copyTraceRayPayloadIncomingToCaller( *cast(OutgoingSerializationInfo), - PayloadOrAttrs); + PayloadOrAttrs, Data.PayloadStorage); } } @@ -981,7 +989,7 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( /// for LgcCps mode or get the value from system data for non-LgcCps mode. void LowerRaytracingPipelinePassImpl::replaceShaderIndexCall(FunctionData &Data, CallInst *Call) { - if (Data.Kind == DXILShaderKind::RayGeneration) { + if (Data.Kind == RayTracingShaderStage::RayGeneration) { Call->replaceAllUsesWith(Builder.getInt32(0)); } else { Value *ShaderIndex = nullptr; @@ -1006,37 +1014,37 @@ void LowerRaytracingPipelinePassImpl::replaceShaderIndexCall(FunctionData &Data, /// Replace a call to lgc.rt.shader.record.buffer with loading the resource. void LowerRaytracingPipelinePassImpl::replaceShaderRecordBufferCall( FunctionData &Data, CallInst *Call) { - auto shaderRecordBufferOp = cast(Call); - auto tableIndex = shaderRecordBufferOp->getShaderIndex(); + auto *ShaderRecordBuffer = cast(Call); + auto *TableIndex = ShaderRecordBuffer->getShaderIndex(); assert(GetSbtAddress && "Could not find GetSbtAddress function"); assert(GetSbtStride && "Could not find GetSbtStride function"); - Value *tableAddr = + Value *TableAddr = CrossInliner.inlineCall(Builder, GetSbtAddress).returnValue; - Value *tableStride = + Value *TableStride = CrossInliner.inlineCall(Builder, GetSbtStride).returnValue; // SBT starts with shader group handle (aka shader identifier), which is 32 // bytes, then the data for shader record buffer. constexpr unsigned ShaderIdEntrySizeInBytes = 32; - Value *shaderIdsSizeVal = Builder.getInt32(ShaderIdEntrySizeInBytes); + Value *ShaderIdsSizeVal = Builder.getInt32(ShaderIdEntrySizeInBytes); // Byte offset = (tableStride * tableIndex) + shaderIdsSize - Value *offset = Builder.CreateMul(tableIndex, tableStride); - offset = Builder.CreateAdd(offset, shaderIdsSizeVal); + Value *Offset = Builder.CreateMul(TableIndex, TableStride); + Offset = Builder.CreateAdd(Offset, ShaderIdsSizeVal); // Zero-extend offset value to 64 bit - offset = Builder.CreateZExt(offset, Builder.getInt64Ty()); + Offset = Builder.CreateZExt(Offset, Builder.getInt64Ty()); // Final addr - tableAddr = Builder.CreateAdd(tableAddr, offset); + TableAddr = Builder.CreateAdd(TableAddr, Offset); - Type *gpuAddrAsPtrTy = + Type *GpuAddrAsPtrTy = PointerType::get(Builder.getContext(), 1 /* ADDR_SPACE_GLOBAL */); - tableAddr = Builder.CreateIntToPtr(tableAddr, gpuAddrAsPtrTy); + TableAddr = Builder.CreateIntToPtr(TableAddr, GpuAddrAsPtrTy); - Call->replaceAllUsesWith(tableAddr); + Call->replaceAllUsesWith(TableAddr); Call->eraseFromParent(); } @@ -1067,7 +1075,7 @@ void LowerRaytracingPipelinePassImpl::handleGetShaderKind(Function &Func) { llvm::forEachCall(Func, [&](llvm::CallInst &CInst) { Function *F = CInst.getFunction(); - auto Stage = lgc::rt::getLgcRtShaderStage(F); + auto Stage = getLgcRtShaderStage(F); // Ignore GetShaderKind calls where we cannot find the shader kind. // This happens e.g. in gpurt-implemented intrinsics that got inlined, @@ -1076,7 +1084,7 @@ void LowerRaytracingPipelinePassImpl::handleGetShaderKind(Function &Func) { return; DXILShaderKind ShaderKind = - ShaderStageHelper::shaderStageToDxilShaderKind(*Stage); + ShaderStageHelper::rtShaderStageToDxilShaderKind(*Stage); auto *ShaderKindVal = ConstantInt::get(Func.getReturnType(), static_cast(ShaderKind)); CInst.replaceAllUsesWith(ShaderKindVal); @@ -1091,8 +1099,16 @@ void LowerRaytracingPipelinePassImpl::handleGetCurrentFuncAddr(Function &Func) { Func.getReturnType()->isIntegerTy(64))); llvm::forEachCall(Func, [&](llvm::CallInst &CInst) { - auto *FuncPtrToInt = - ConstantExpr::getPtrToInt(CInst.getFunction(), Func.getReturnType()); + auto *F = CInst.getFunction(); + auto *RetTy = Func.getReturnType(); + Value *FuncPtrToInt = nullptr; + if (MetadataState.isInLgcCpsMode()) { + // Add CPS level to function address + Builder.SetInsertPoint(&CInst); + FuncPtrToInt = Builder.create(RetTy, F); + } else { + FuncPtrToInt = ConstantExpr::getPtrToInt(F, RetTy); + } CInst.replaceAllUsesWith(FuncPtrToInt); CInst.eraseFromParent(); }); @@ -1107,50 +1123,51 @@ void llvm::copyBytes(IRBuilder<> &B, Value *Dst, Value *Src, uint64_t NumFullI32s = NumBytes / RegisterBytes; // Copy full I32s for (uint64_t I32Index = 0; I32Index < NumFullI32s; ++I32Index) { - auto *DstPtr = B.CreateConstGEP1_64(I32, Dst, I32Index); - auto *SrcPtr = B.CreateConstGEP1_64(I32, Src, I32Index); + auto *DstPtr = SimplifyingCreateConstInBoundsGEP1_32(B, I32, Dst, I32Index); + auto *SrcPtr = SimplifyingCreateConstInBoundsGEP1_32(B, I32, Src, I32Index); auto *Val = B.CreateLoad(I32, SrcPtr); B.CreateStore(Val, DstPtr); } // Copy remaining bytes - uint64_t NumRemainingBytes = NumBytes - (NumFullI32s * RegisterBytes); + uint64_t NumRemainingBytes = NumBytes % RegisterBytes; if (NumRemainingBytes == 0) return; - // Create a packed struct containing NumRemainingBytes many i8, bitcast src - // and dst pointers (+ offset) to the packed struct, and load/store the - // struct. A similar technique is used in RegisterBufferPass. + // Create i8 loads and stores for the remaining bytes Type *I8 = B.getIntNTy(8); - SmallVector Elements; - Elements.resize(NumRemainingBytes, I8); - Type *StructTy = StructType::create(Elements, "", /* isPacked */ true); - - // Note: These pointers may not be dereferenced as I32s, because the I32s - // overlap past the end of the Dst and Src range - auto *DstStructPtr = B.CreateBitCast( - B.CreateConstGEP1_64(I32, Dst, NumFullI32s), - StructTy->getPointerTo(Dst->getType()->getPointerAddressSpace())); - auto *SrcStructPtr = B.CreateBitCast( - B.CreateConstGEP1_64(I32, Src, NumFullI32s), - StructTy->getPointerTo(Src->getType()->getPointerAddressSpace())); - - auto *Val = B.CreateLoad(StructTy, SrcStructPtr); - B.CreateStore(Val, DstStructPtr); + for (uint64_t I8Index = NumFullI32s * RegisterBytes; I8Index < NumBytes; + ++I8Index) { + auto *DstPtr = SimplifyingCreateConstGEP1_32(B, I8, Dst, I8Index); + auto *SrcPtr = SimplifyingCreateConstGEP1_32(B, I8, Src, I8Index); + auto *Val = B.CreateLoad(I8, SrcPtr); + B.CreateStore(Val, DstPtr); + } } void LowerRaytracingPipelinePassImpl::copyPayload( - Type &PayloadTy, Value *LocalPayload, std::optional Stage, - PAQAccessKind GlobalAccessKind, const PAQSerializationLayout &Layout, + Type &PayloadTy, Value *LocalPayload, Value *PayloadStorage, + std::optional Stage, PAQAccessKind GlobalAccessKind, + const PAQSerializationLayout &Layout, SmallDenseSet *CopiedNodes) { // Nothing to do if there is no serialization type, i.e. the layout is empty if (!Layout.SerializationTy) return; - // Obtain pointer to global payload serialization struct - Value *PayloadSerialization = Builder.CreateBitCast( - Payload, - Layout.SerializationTy->getPointerTo(Payload->getAddressSpace())); + LLVM_DEBUG(dbgs() << (GlobalAccessKind == PAQAccessKind::Read ? "Incoming" + : "Outgoing") + << " serialization layout of " + << cast(LocalPayload)->getFunction()->getName() + << ": " << *Layout.SerializationTy << "\n"); + + Value *SpilledPayloadPtr = nullptr; + if (Layout.PayloadMemPointerNode) { + auto *SpillPtr = SimplifyingCreateConstInBoundsGEP1_32( + Builder, Builder.getInt8Ty(), PayloadStorage, + FirstPayloadMemoryPointerRegister); + SpilledPayloadPtr = Builder.CreateLoad( + Builder.getPtrTy(lgc::cps::stackAddrSpace), SpillPtr); + } PayloadCopyHelper Helper{ *Mod, @@ -1161,15 +1178,17 @@ void LowerRaytracingPipelinePassImpl::copyPayload( GlobalAccessKind, {Builder.getInt32(0)}, CopiedNodes, - PayloadSerialization, + PayloadStorage, &Layout, + SpilledPayloadPtr, + MetadataState.getMaxPayloadRegisterCount(), }; Helper.copyPayloadRecursively(Layout.PayloadRootNode); } void LowerRaytracingPipelinePassImpl::copyTraceRayPayloadIncomingToCaller( - const PAQTraceRaySerializationInfo &SerializationInfo, - Value *LocalPayload) { + const PAQTraceRaySerializationInfo &SerializationInfo, Value *LocalPayload, + Value *PayloadStorage) { SmallDenseSet CopiedNodes; for (auto LayoutKind : {PAQSerializationLayoutKind::ClosestHitOut, @@ -1177,13 +1196,14 @@ void LowerRaytracingPipelinePassImpl::copyTraceRayPayloadIncomingToCaller( const PAQSerializationLayout &Layout = SerializationInfo.LayoutsByKind[LayoutKind]; copyPayload(*SerializationInfo.PayloadRootNode->Ty, LocalPayload, - PAQShaderStage::Caller, PAQAccessKind::Read, Layout, - &CopiedNodes); + PayloadStorage, PAQShaderStage::Caller, PAQAccessKind::Read, + Layout, &CopiedNodes); } } void LowerRaytracingPipelinePassImpl::savePayloadRegistersBeforeRecursion( - DXILShaderKind Kind, const PAQSerializationLayout &IncomingLayout, + Value *PayloadStorage, RayTracingShaderStage Kind, + const PAQSerializationLayout &IncomingLayout, const PAQSerializationLayout &OutgoingLayout, SmallVectorImpl &SavedRegisterValues) { @@ -1192,7 +1212,7 @@ void LowerRaytracingPipelinePassImpl::savePayloadRegistersBeforeRecursion( SavedRegisterValues.resize(MetadataState.getMaxPayloadRegisterCount()); - std::optional Stage = dxilShaderKindToPAQShaderStage(Kind); + std::optional Stage = rtShaderStageToPAQShaderStage(Kind); auto *RegTy = Builder.getIntNTy(RegisterBytes * 8); for (const auto &NodeWithStorageInfo : OutgoingLayout.NodeStorageInfos) { @@ -1218,7 +1238,7 @@ void LowerRaytracingPipelinePassImpl::savePayloadRegistersBeforeRecursion( ++I) { // Create backup of the I-th payload register auto *LoadPtr = - Builder.CreateConstGEP2_32(Payload->getValueType(), Payload, 0, I); + SimplifyingCreateConstGEP1_32(Builder, I32, PayloadStorage, I); auto *OldValue = Builder.CreateLoad(RegTy, LoadPtr); // As long as we keep a 32 bit alignment of all fields, all fields // get disjoint registers, and we should never save a register twice. @@ -1239,13 +1259,13 @@ void LowerRaytracingPipelinePassImpl::savePayloadRegistersBeforeRecursion( } void LowerRaytracingPipelinePassImpl::restorePayloadRegistersAfterRecursion( + Value *PayloadStorage, const SmallVectorImpl &SavedRegisterValues) { for (unsigned I = 0; I < SavedRegisterValues.size(); ++I) { Value *OldValue = SavedRegisterValues[I]; if (OldValue) { auto *StorePtr = - Builder.CreateGEP(Payload->getValueType(), Payload, - {Builder.getInt32(0), Builder.getInt32(I)}); + SimplifyingCreateConstGEP1_32(Builder, I32, PayloadStorage, I); Builder.CreateStore(SavedRegisterValues[I], StorePtr); } } @@ -1304,8 +1324,9 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes( // Obtain pointer to global payload serialization struct Value *PayloadSerialization = Builder.CreateBitCast( - Payload, - Layout->SerializationTy->getPointerTo(Payload->getAddressSpace())); + Data.PayloadStorage, + Layout->SerializationTy->getPointerTo( + Data.PayloadStorage->getType()->getPointerAddressSpace())); // Last zero yields pointer to the first element of the i32 array PayloadHitAttrs = Builder.CreateInBoundsGEP( Layout->SerializationTy, PayloadSerialization, @@ -1316,7 +1337,7 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes( // Inline attributes suffice, nothing to do. } } else { - assert(Data.Kind == DXILShaderKind::Intersection && + assert(Data.Kind == RayTracingShaderStage::Intersection && "Unexpected shader kind"); // We are in an intersection shader, which does not know the payload type. // Assume maximum possible size @@ -1324,8 +1345,8 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes( Data.FuncConfig.MaxHitAttributeBytes - InlineHitAttrsBytes; // Use hit attribute storage at fixed index PayloadHitAttrs = - Builder.CreateConstGEP2_32(Payload->getValueType(), Payload, 0, - FirstPayloadHitAttributeStorageRegister); + SimplifyingCreateConstGEP1_32(Builder, I32, Data.PayloadStorage, + FirstPayloadHitAttributeStorageRegister); } uint64_t HitAttrsBytes = @@ -1337,14 +1358,15 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes( LocalHitAttributes = Builder.CreateBitCast(LocalHitAttributes, RegTyPtr); auto *I8Ty = Builder.getInt8Ty(); for (unsigned I = 0; I < divideCeil(HitAttrsBytes, RegisterBytes); I++) { - auto *LocalPtr = - Builder.CreateConstInBoundsGEP1_64(RegTy, LocalHitAttributes, I); + auto *LocalPtr = SimplifyingCreateConstInBoundsGEP1_32( + Builder, RegTy, LocalHitAttributes, I); Value *GlobalPtr; if (I < InlineRegSize) - GlobalPtr = Builder.CreateConstInBoundsGEP1_64(RegTy, InlineHitAttrs, I); + GlobalPtr = SimplifyingCreateConstInBoundsGEP1_32(Builder, RegTy, + InlineHitAttrs, I); else - GlobalPtr = Builder.CreateConstInBoundsGEP1_64(RegTy, PayloadHitAttrs, - I - InlineRegSize); + GlobalPtr = SimplifyingCreateConstInBoundsGEP1_32( + Builder, RegTy, PayloadHitAttrs, I - InlineRegSize); auto *LoadPtr = GlobalToLocal ? GlobalPtr : LocalPtr; auto *StorePtr = GlobalToLocal ? LocalPtr : GlobalPtr; @@ -1358,10 +1380,11 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes( auto *ByteStorePtr = Builder.CreateBitCast(StorePtr, I8Ty->getPointerTo()); for (unsigned J = 0; J < HitAttrsBytes % RegisterBytes; J++) { - auto *Val = Builder.CreateLoad( - I8Ty, Builder.CreateConstInBoundsGEP1_64(I8Ty, ByteLoadPtr, J)); - Builder.CreateStore( - Val, Builder.CreateConstInBoundsGEP1_64(I8Ty, ByteStorePtr, J)); + auto *Val = + Builder.CreateLoad(I8Ty, SimplifyingCreateConstInBoundsGEP1_32( + Builder, I8Ty, ByteLoadPtr, J)); + Builder.CreateStore(Val, SimplifyingCreateConstInBoundsGEP1_32( + Builder, I8Ty, ByteStorePtr, J)); } } } @@ -1395,24 +1418,15 @@ void LowerRaytracingPipelinePassImpl::createPayloadGlobal() { std::max(MaxPayloadI32s, FuncData.IncomingPayloadSerializationInfo->MaxStorageI32s); } - auto *PayloadTy = ArrayType::get(I32, MaxPayloadI32s); + auto *PayloadTy = + ArrayType::get(I32, MetadataState.getMaxPayloadRegisterCount()); - Payload = cast( + PayloadStorageGlobal = cast( Mod->getOrInsertGlobal(ContHelper::GlobalPayloadName, PayloadTy, [&] { - auto *Payload = new GlobalVariable( + return new GlobalVariable( *Mod, PayloadTy, false, GlobalVariable::ExternalLinkage, nullptr, ContHelper::GlobalPayloadName, nullptr, - GlobalVariable::NotThreadLocal); - - // Add registerbuffer metadata unconditionally to split all accesses - // into i32s - RegisterBufferMD RMD; - RMD.RegisterCount = MetadataState.getMaxPayloadRegisterCount(); - RMD.Addrspace = lgc::cps::stackAddrSpace; - auto *MD = createRegisterBufferMetadata(*Context, RMD); - Payload->addMetadata("registerbuffer", *MD); - - return Payload; + GlobalVariable::NotThreadLocal, GlobalRegisterAddrspace); })); } @@ -1438,19 +1452,14 @@ void LowerRaytracingPipelinePassImpl::setGpurtEntryRegisterCountMetadata() { uint32_t InRegisterCount = 0; uint32_t OutRegisterCount = 0; auto *Func = CI->getFunction(); - switch (lgc::rt::getLgcRtShaderStage(Func).value()) { - case lgc::rt::RayTracingShaderStage::Traversal: + switch (getLgcRtShaderStage(Func).value()) { + case RayTracingShaderStage::Traversal: InRegisterCount = MaxRegisterCount; OutRegisterCount = MaxRegisterCount; break; - case lgc::rt::RayTracingShaderStage::KernelEntry: + case RayTracingShaderStage::KernelEntry: InRegisterCount = 0; - // Technically, we could likely use zero registers instead, but it - // shouldn't make a difference for performance, and assuming the - // worst-case avoids nasty errors in case we do use payload registers - // for some reason. Longer term, we'll get rid of the REGISTERS global - // and register count metadata anyways. - OutRegisterCount = MaxRegisterCount; + OutRegisterCount = 0; break; default: continue; @@ -1472,7 +1481,6 @@ void LowerRaytracingPipelinePassImpl::setGpurtEntryRegisterCountMetadata() { void LowerRaytracingPipelinePassImpl::processContinuations() { TokenTy = StructType::create(*Context, "continuation.token")->getPointerTo(); - RegisterBufferSetPointerBarrier = getRegisterBufferSetPointerBarrier(*Mod); for (auto &FuncData : ToProcess) { processFunction(FuncData.first, FuncData.second); @@ -1499,7 +1507,7 @@ void LowerRaytracingPipelinePassImpl::processFunctionEntry( void LowerRaytracingPipelinePassImpl::processFunctionEnd( FunctionData &Data, FunctionEndData &EData) { AnyHitExitKind AHExitKind = AnyHitExitKind::None; - bool IsAnyHit = Data.Kind == DXILShaderKind::AnyHit; + bool IsAnyHit = Data.Kind == RayTracingShaderStage::AnyHit; if (IsAnyHit) { // Default to AcceptHit, which is only implicitly represented by @@ -1519,8 +1527,9 @@ void LowerRaytracingPipelinePassImpl::processFunctionEnd( Builder.SetInsertPoint(EData.Terminator); auto *PayloadTy = Data.IncomingPayload; - if (Data.Kind != DXILShaderKind::RayGeneration && - Data.Kind != DXILShaderKind::Intersection) { + if (Data.Kind != RayTracingShaderStage::RayGeneration && + Data.Kind != RayTracingShaderStage::Intersection && + Data.Kind != RayTracingShaderStage::Traversal) { assert(PayloadTy && "Missing payload type!"); if (IsAnyHit) { @@ -1543,13 +1552,14 @@ void LowerRaytracingPipelinePassImpl::processFunctionEnd( // Restore saved registers. This needs to be done *before* copying // back the payload, which depends on the restored memory pointer! - restorePayloadRegistersAfterRecursion(EData.SavedRegisterValues); + restorePayloadRegistersAfterRecursion(Data.PayloadStorage, + EData.SavedRegisterValues); // Copy local payload into global payload at end of shader if (EData.OutgoingSerializationLayout->NumStorageI32s) { - Builder.CreateCall(RegisterBufferSetPointerBarrier, {Payload}); - copyPayload(*PayloadTy, EData.NewPayload, EData.ShaderStage, - PAQAccessKind::Write, *EData.OutgoingSerializationLayout); + copyPayload(*PayloadTy, EData.NewPayload, Data.PayloadStorage, + EData.ShaderStage, PAQAccessKind::Write, + *EData.OutgoingSerializationLayout); } if (IsAnyHit) { @@ -1581,14 +1591,17 @@ void LowerRaytracingPipelinePassImpl::processFunctionEnd( } Instruction *Ret = nullptr; + unsigned OutgoingRegisterCount = + EData.OutgoingSerializationLayout + ? std::min(EData.OutgoingSerializationLayout->NumStorageI32s, + MetadataState.getMaxPayloadRegisterCount()) + : MetadataState.getMaxPayloadRegisterCount(); if (MetadataState.isInLgcCpsMode()) { - uint32_t CpsRetLevel = getPotentialCpsReturnLevels( - convertShaderKindToCpsShaderStage(Data.Kind)); - - if (Data.Kind == DXILShaderKind::RayGeneration) { + if (Data.Kind == RayTracingShaderStage::RayGeneration) { assert(!RetValue && "RayGen cannot return anything"); Builder.CreateRetVoid(); } else { + uint32_t CpsRetLevel = getPotentialCpsReturnLevels(Data.Kind); // Jump to resume point of caller, pass Poison Rcr and ShaderIndex as they // are not meaningful for the case. SmallVector TailArgs = {PoisonValue::get(I32), @@ -1596,7 +1609,14 @@ void LowerRaytracingPipelinePassImpl::processFunctionEnd( if (RetValue) TailArgs.push_back(RetValue); - Builder.create( + // Add padding so that payload starts at a fixed dword. + ContHelper::addPaddingValue(*DL, *Context, TailArgs, + Data.FirstPayloadArgumentDword); + + TailArgs.push_back(Builder.CreateLoad( + ArrayType::get(I32, OutgoingRegisterCount), Data.PayloadStorage)); + + Ret = Builder.create( EData.Terminator->getFunction()->getArg(CpsArgIdxReturnAddr), CpsRetLevel, PoisonValue::get(StructType::get(Builder.getContext())), TailArgs); @@ -1610,11 +1630,6 @@ void LowerRaytracingPipelinePassImpl::processFunctionEnd( // Annotate the terminator with number of outgoing payload registers. // This annotation will be passed along the following transformations, // ending up at the final continuation call. - unsigned OutgoingRegisterCount = - EData.OutgoingSerializationLayout - ? std::min(EData.OutgoingSerializationLayout->NumStorageI32s, - MetadataState.getMaxPayloadRegisterCount()) - : MetadataState.getMaxPayloadRegisterCount(); ContHelper::setOutgoingRegisterCount(Ret, OutgoingRegisterCount); if (EData.OutgoingSerializationLayout) MetadataState.updateMaxUsedPayloadRegisterCount(OutgoingRegisterCount); @@ -1631,6 +1646,7 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, SmallVector AllArgTypes; Type *NewRetTy; Type *SystemDataTy = nullptr; + Type *CpsArgPayloadTy = nullptr; if (MetadataState.isInLgcCpsMode()) { // Create the CPS function header. @@ -1640,30 +1656,82 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, // * Return continuation reference (RCR): i32 // * Shader index // * Remaining arguments (system data, optionally hit attributes) + // We need to determine the starting dword of payload storage in arguments, + // so that payload starts at a fixed VGPR across all shaders in a pipeline. + // The overall layout is: + // | returnAddr | shaderIndex | systemData | hitAttrs | padding | payload | + // For systemData and hitAttrs, use the max possible sizes for calculation. AllArgTypes.push_back(StructType::get(Mod->getContext())); AllArgTypes.push_back(Builder.getInt32Ty()); AllArgTypes.push_back(Builder.getInt32Ty()); + + // Determine payload storage type + uint32_t PayloadStorageI32s = MetadataState.getMaxPayloadRegisterCount(); + PayloadStorageI32s = + std::max(PayloadStorageI32s, Data.MaxOutgoingPayloadI32s); + if (Data.IncomingPayloadSerializationInfo) + PayloadStorageI32s = + std::max(PayloadStorageI32s, + Data.IncomingPayloadSerializationInfo->MaxStorageI32s); + Data.PayloadStorageTy = ArrayType::get(I32, PayloadStorageI32s); + + // Determine payload starting dword + // NOTE: _AmdEnqueueAnyHit always passes a 2 dword barycentrics, need to + // take that into account in the situation that hit attributes are not used + // or less than 2 dwords. + assert(TraversalDataTy && "Failed to detect traversal system data type"); + Data.FirstPayloadArgumentDword = + 1 + 1 + getArgumentDwordCount(*DL, TraversalDataTy) + + std::max( + divideCeil(Data.FuncConfig.MaxHitAttributeBytes, RegisterBytes), + uint64_t(2)); + + unsigned IncomingStorageI32s = 0; + if (Data.Kind != RayTracingShaderStage::RayGeneration && + Data.Kind != RayTracingShaderStage::Intersection && + Data.Kind != RayTracingShaderStage::Traversal) { + const PAQSerializationLayout &IncomingSerializationLayout = + PAQManager.getOrCreateShaderStartSerializationLayout( + *Data.IncomingPayloadSerializationInfo, Data.Kind, + Data.HitAttributes); + IncomingStorageI32s = + std::min(IncomingSerializationLayout.NumStorageI32s, + MetadataState.getMaxPayloadRegisterCount()); + } else if (Data.Kind == RayTracingShaderStage::Traversal) { + // We should have set up preserved register count for Traversal, if not, + // fall back to max count. + // Even if PreservedPayloadRegisterCount is set, there may be additional + // shaders in the current module whose usage is recorded in + // MaxUsedPayloadRegisterCount, to take the max with it. + IncomingStorageI32s = + std::max(MetadataState.tryGetPreservedPayloadRegisterCount().value_or( + MetadataState.getMaxPayloadRegisterCount()), + MetadataState.getMaxUsedPayloadRegisterCount()); + } else { + // For IS, use max size + IncomingStorageI32s = MetadataState.getMaxPayloadRegisterCount(); + } + CpsArgPayloadTy = ArrayType::get(I32, IncomingStorageI32s); } const auto SystemDataArgumentIndex = AllArgTypes.size(); switch (Data.Kind) { - case DXILShaderKind::RayGeneration: { - assert(SetupRayGen && "Could not find SetupRayGen function"); - SystemDataTy = SetupRayGen->getReturnType(); + case RayTracingShaderStage::RayGeneration: { + SystemDataTy = DispatchSystemDataTy; AllArgTypes.push_back(SystemDataTy); NewRetTy = Builder.getVoidTy(); break; } - case DXILShaderKind::Intersection: { + case RayTracingShaderStage::Intersection: { assert(TraversalDataTy && "Failed to detect traversal system data type"); SystemDataTy = TraversalDataTy; AllArgTypes.push_back(SystemDataTy); NewRetTy = SystemDataTy; break; } - case DXILShaderKind::AnyHit: { + case RayTracingShaderStage::AnyHit: { assert(TraversalDataTy && "Failed to detect traversal system data type"); SystemDataTy = TraversalDataTy; AllArgTypes.push_back(SystemDataTy); @@ -1671,18 +1739,22 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, NewRetTy = SystemDataTy; break; } - case DXILShaderKind::ClosestHit: - case DXILShaderKind::Miss: { + case RayTracingShaderStage::ClosestHit: + case RayTracingShaderStage::Miss: { assert(HitMissDataTy && "Failed to detect hit/miss system data type"); - assert(SetupRayGen && "Could not find SetupRayGen function"); SystemDataTy = HitMissDataTy; AllArgTypes.push_back(SystemDataTy); - NewRetTy = SetupRayGen->getReturnType(); + NewRetTy = DispatchSystemDataTy; + break; + } + case RayTracingShaderStage::Callable: { + SystemDataTy = DispatchSystemDataTy; + AllArgTypes.push_back(SystemDataTy); + NewRetTy = SystemDataTy; break; } - case DXILShaderKind::Callable: { - assert(SetupRayGen && "Could not find SetupRayGen function"); - SystemDataTy = SetupRayGen->getReturnType(); + case RayTracingShaderStage::Traversal: { + SystemDataTy = getFuncArgPtrElementType(F, 0); AllArgTypes.push_back(SystemDataTy); NewRetTy = SystemDataTy; break; @@ -1691,10 +1763,32 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, llvm_unreachable("Unhandled ShaderKind"); } + if (MetadataState.isInLgcCpsMode() && + Data.Kind != RayTracingShaderStage::RayGeneration) { + auto *DummyArgTy = StructType::get(*Context, {}); + + if (Data.Kind != RayTracingShaderStage::AnyHit) { + // Add a dummy argument for CpsArgIdxHitAttributes so that the arg index + // of payload matches CpsArgIdxPayload + AllArgTypes.push_back(DummyArgTy); + } + + // Add padding so that payload starts at a fixed dword. + ContHelper::addPaddingType(*DL, *Context, AllArgTypes, + Data.FirstPayloadArgumentDword); + // Place payload at the end + AllArgTypes.push_back(CpsArgPayloadTy); + } + Data.PayloadSpillSize = computeNeededStackSizeForRegisterBuffer( Data.MaxOutgoingPayloadI32s, MetadataState.getMaxPayloadRegisterCount()); assert(Data.PayloadSpillSize == 0 || - Data.Kind != DXILShaderKind::Intersection); + Data.Kind != RayTracingShaderStage::Intersection); + + if (!MetadataState.isInLgcCpsMode()) { + Data.PayloadStorage = PayloadStorageGlobal; + Data.PayloadStorageTy = PayloadStorageGlobal->getValueType(); + } auto *FunctionTypeRetTy = MetadataState.isInLgcCpsMode() ? Builder.getVoidTy() : NewRetTy; @@ -1717,15 +1811,42 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, NewFunc->getArg(CpsArgIdxContState)->setName("cont.state"); NewFunc->getArg(CpsArgIdxReturnAddr)->setName("return.addr"); NewFunc->getArg(CpsArgIdxShaderIndex)->setName("shader.index"); + if (Data.Kind != RayTracingShaderStage::RayGeneration) { + NewFunc->getArg(CpsArgIdxSystemData)->setName("system.data"); + NewFunc->getArg(CpsArgIdxHitAttributes)->setName("hit.attrs"); + NewFunc->getArg(CpsArgIdxPadding)->setName("padding"); + NewFunc->getArg(CpsArgIdxPayload)->setName("payload"); + } // Mark as CPS function with the corresponding level. - CpsLevel Level = - getCpsLevelForShaderStage(convertShaderKindToCpsShaderStage(Data.Kind)); + CpsLevel Level = getCpsLevelForShaderStage(Data.Kind); setCpsFunctionLevel(*NewFunc, Level); + + if (Data.Kind == RayTracingShaderStage::Traversal) { + // Compute here means Traversal. + assert(F->arg_size() == 1); + // System data + // NOTE: Pointer address space may not match based on data layout, mutate + // the address space here to keep later GEP valid. + Data.SystemData->mutateType(getWithSamePointeeType( + Data.SystemData->getType(), + F->getArg(0)->getType()->getPointerAddressSpace())); + F->getArg(0)->replaceAllUsesWith(Data.SystemData); + } else { + // Create local payload storage for non-Traversal shader. + IRBuilder<>::InsertPointGuard Guard(Builder); + Builder.SetInsertPointPastAllocas(NewFunc); + Data.PayloadStorage = Builder.CreateAlloca(Data.PayloadStorageTy); + Data.PayloadStorage->setName("payload.alloca"); + // TODO: We shouldn't need to create the alloca for RGS. + if (Data.Kind != RayTracingShaderStage::RayGeneration) + Builder.CreateStore(NewFunc->getArg(CpsArgIdxPayload), + Data.PayloadStorage); + } } FunctionEndData EData; - if (Data.Kind == DXILShaderKind::RayGeneration) { + if (Data.Kind == RayTracingShaderStage::RayGeneration) { if (!MetadataState.isInLgcCpsMode()) { NewFunc->setMetadata(ContHelper::MDEntryName, MDTuple::get(*Context, {})); @@ -1742,7 +1863,7 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, Type *PayloadTy = Data.IncomingPayload; std::optional ShaderStage = - dxilShaderKindToPAQShaderStage(Data.Kind); + rtShaderStageToPAQShaderStage(Data.Kind); PAQSerializationInfoBase *SerializationInfo = Data.IncomingPayloadSerializationInfo; @@ -1754,12 +1875,13 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, // recursive TraceRay or CallShader) SmallVector SavedRegisterValues{}; - if (Data.Kind != DXILShaderKind::Intersection) { + if (Data.Kind != RayTracingShaderStage::Intersection && + Data.Kind != RayTracingShaderStage::Traversal) { assert(PayloadTy && "Missing payload type!"); // For AnyHit, the layout depends on whether we accept or ignore, which // we do not know yet. In that case, the layout is determined later. - if (Data.Kind != DXILShaderKind::AnyHit) { + if (Data.Kind != RayTracingShaderStage::AnyHit) { OutgoingSerializationLayout = &PAQManager.getOrCreateShaderExitSerializationLayout( *SerializationInfo, Data.Kind, Data.HitAttributes, @@ -1780,36 +1902,31 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, FPayload->replaceAllUsesWith(NewPayload); } - if (MetadataState.isInLgcCpsMode()) { - // TODO Read payload argument for lgc continuations - } else { + auto IncomingRegisterCount = + std::min(IncomingSerializationLayout.NumStorageI32s, + MetadataState.getMaxPayloadRegisterCount()); + MetadataState.updateMaxUsedPayloadRegisterCount(IncomingRegisterCount); + if (!MetadataState.isInLgcCpsMode()) { // Annotate function with the number of registers for incoming payload - auto IncomingRegisterCount = - std::min(IncomingSerializationLayout.NumStorageI32s, - MetadataState.getMaxPayloadRegisterCount()); ContHelper::setIncomingRegisterCount(NewFunc, IncomingRegisterCount); - MetadataState.updateMaxUsedPayloadRegisterCount(IncomingRegisterCount); - - // Copy global payload into local payload at start of shader - if (IncomingSerializationLayout.NumStorageI32s) { - copyPayload(*PayloadTy, NewPayload, ShaderStage, PAQAccessKind::Read, - IncomingSerializationLayout); - // Add barrier so no stores that may overwrite the memory pointer - // are put before the payload is read - Builder.CreateCall(RegisterBufferSetPointerBarrier, {Payload}); - } + } - if (!Data.CallShaderCalls.empty() || !Data.TraceRayCalls.empty()) { - assert(OutgoingSerializationLayout && - "Missing outgoing serialization layout!"); - savePayloadRegistersBeforeRecursion( - Data.Kind, IncomingSerializationLayout, - *OutgoingSerializationLayout, SavedRegisterValues); - } + // Copy global payload into local payload at start of shader + if (IncomingSerializationLayout.NumStorageI32s) { + copyPayload(*PayloadTy, NewPayload, Data.PayloadStorage, ShaderStage, + PAQAccessKind::Read, IncomingSerializationLayout); + } + + if (!Data.CallShaderCalls.empty() || !Data.TraceRayCalls.empty()) { + assert(OutgoingSerializationLayout && + "Missing outgoing serialization layout!"); + savePayloadRegistersBeforeRecursion( + Data.PayloadStorage, Data.Kind, IncomingSerializationLayout, + *OutgoingSerializationLayout, SavedRegisterValues); } // Handle hit attributes - if (Data.Kind == DXILShaderKind::AnyHit) { + if (Data.Kind == RayTracingShaderStage::AnyHit) { assert(F->arg_size() == 2 && "Shader has more arguments than expected"); auto *HitAttrs = F->getArg(1); @@ -1831,10 +1948,12 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, &IncomingSerializationLayout); // Copy new hit attributes from argument - Builder.CreateStore(NewFunc->getArg(NewFunc->arg_size() - 1), - HitAttrsAlloca); + unsigned HitAttributesIdx = MetadataState.isInLgcCpsMode() + ? CpsArgIdxHitAttributes + : NewFunc->arg_size() - 1; + Builder.CreateStore(NewFunc->getArg(HitAttributesIdx), HitAttrsAlloca); HitAttrs->replaceAllUsesWith(HitAttrsAlloca); - } else if (Data.Kind == DXILShaderKind::ClosestHit) { + } else if (Data.Kind == RayTracingShaderStage::ClosestHit) { assert(F->arg_size() == 2 && "Shader has more arguments than expected"); auto *OrigHitAttrs = F->getArg(1); @@ -1879,16 +1998,37 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, // Modify function ends // While iterating over function ends, basic blocks are inserted by inlining // functions, so we copy them beforehand. - SmallVector BBs(make_pointer_range(*NewFunc)); - for (auto *BB : BBs) { - auto *I = BB->getTerminator(); - assert(I && "BB must have terminator"); - // Replace the end of the BB if it terminates the function - bool IsFunctionEnd = (I->getOpcode() == Instruction::Ret || - I->getOpcode() == Instruction::Unreachable); - if (IsFunctionEnd) { - EData.Terminator = I; - processFunctionEnd(Data, EData); + if (MetadataState.isInLgcCpsMode() && + Data.Kind == RayTracingShaderStage::Traversal) { + // Fixup lgc.cps.jump + for (auto *Jump : Data.JumpCalls) { + Builder.SetInsertPoint(Jump); + SmallVector NewTailArgs(Jump->getTail()); + + // Add padding so that payload starts at a fixed dword. + ContHelper::addPaddingValue(*DL, *Context, NewTailArgs, + Data.FirstPayloadArgumentDword); + + // Insert payload into tail args. + NewTailArgs.push_back(NewFunc->getArg(CpsArgIdxPayload)); + + Builder.create(Jump->getTarget(), Jump->getLevels(), + Jump->getState(), NewTailArgs); + Jump->dropAllReferences(); + Jump->eraseFromParent(); + } + } else { + SmallVector BBs(make_pointer_range(*NewFunc)); + for (auto *BB : BBs) { + auto *I = BB->getTerminator(); + assert(I && "BB must have terminator"); + // Replace the end of the BB if it terminates the function + bool IsFunctionEnd = (I->getOpcode() == Instruction::Ret || + I->getOpcode() == Instruction::Unreachable); + if (IsFunctionEnd) { + EData.Terminator = I; + processFunctionEnd(Data, EData); + } } } @@ -1937,7 +2077,7 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, #ifndef NDEBUG if (!MetadataState.isInLgcCpsMode() && - Data.Kind != DXILShaderKind::RayGeneration) { + Data.Kind != RayTracingShaderStage::RayGeneration) { // Check that all returns have registercount metadata for (const auto &BB : *F) { auto *Terminator = BB.getTerminator(); @@ -1952,9 +2092,8 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, static uint32_t getMaxHitAttributeByteCount(const Function &F) { // Use max hit attribute size from metadata, or use globally max allowed // value for the max if metadata is not set - uint32_t Result = lgc::rt::getShaderHitAttributeSize(&F); - if (Result == 0) - Result = GlobalMaxHitAttributeBytes; + auto HitAttributeSizeFromMD = getShaderHitAttributeSize(&F); + uint32_t Result = HitAttributeSizeFromMD.value_or(GlobalMaxHitAttributeBytes); if (Result % RegisterBytes != 0) { auto AlignedSize = alignTo(Result, RegisterBytes); @@ -1967,27 +2106,38 @@ static uint32_t getMaxHitAttributeByteCount(const Function &F) { void LowerRaytracingPipelinePassImpl::collectProcessableFunctions() { for (auto &Func : *Mod) { - auto Stage = lgc::rt::getLgcRtShaderStage(&Func); + auto Stage = getLgcRtShaderStage(&Func); if (!Stage || Func.isDeclaration()) continue; - DXILShaderKind Kind = - ShaderStageHelper::shaderStageToDxilShaderKind(*Stage); + // Skip kernel entry + if (Stage == RayTracingShaderStage::KernelEntry) + continue; + + // Skip Traversal for non-lgc.cps + if (Stage == RayTracingShaderStage::Traversal && + !MetadataState.isInLgcCpsMode()) + continue; + + RayTracingShaderStage Kind = *Stage; switch (Kind) { - case DXILShaderKind::RayGeneration: - case DXILShaderKind::Intersection: - case DXILShaderKind::AnyHit: - case DXILShaderKind::ClosestHit: - case DXILShaderKind::Miss: - case DXILShaderKind::Callable: { + case RayTracingShaderStage::RayGeneration: + case RayTracingShaderStage::Intersection: + case RayTracingShaderStage::AnyHit: + case RayTracingShaderStage::ClosestHit: + case RayTracingShaderStage::Miss: + case RayTracingShaderStage::Callable: + case RayTracingShaderStage::Traversal: { FunctionData Data; Data.Kind = Kind; Data.FuncConfig.MaxHitAttributeBytes = getMaxHitAttributeByteCount(Func); LLVM_DEBUG(dbgs() << "Shader " << Func.getName() << " uses max hit attribute size of " << Data.FuncConfig.MaxHitAttributeBytes << "\n"); - if (Kind != DXILShaderKind::Intersection && - Kind != DXILShaderKind::RayGeneration) { + + if (Kind != RayTracingShaderStage::Intersection && + Kind != RayTracingShaderStage::RayGeneration && + Kind != RayTracingShaderStage::Traversal) { assert(!Func.arg_empty() && "Shader must have at least one argument"); Data.IncomingPayload = getFuncArgPtrElementType(&Func, 0); PAQPayloadConfig PAQConfig = {Data.IncomingPayload, @@ -1997,15 +2147,15 @@ void LowerRaytracingPipelinePassImpl::collectProcessableFunctions() { assert(Data.IncomingPayloadSerializationInfo != nullptr && "Missing serialization info!"); } - if (Kind == DXILShaderKind::AnyHit || - Kind == DXILShaderKind::ClosestHit) { + if (Kind == RayTracingShaderStage::AnyHit || + Kind == RayTracingShaderStage::ClosestHit) { assert(Func.arg_size() >= 2 && "Shader must have at least two arguments"); Data.HitAttributes = getFuncArgPtrElementType(&Func, Func.arg_size() - 1); } - if (Kind == DXILShaderKind::Intersection) { + if (Kind == RayTracingShaderStage::Intersection) { Data.MaxOutgoingPayloadI32s = MetadataState.getMaxPayloadRegisterCount(); } @@ -2067,7 +2217,8 @@ void LowerRaytracingPipelinePassImpl::handleUnrematerializableCandidates() { static const llvm_dialects::OpSet NonRematerializableDialectOps = llvm_dialects::OpSet::get(); + ShaderIndexOp, ShaderRecordBufferOp, + JumpOp>(); if (!NonRematerializableDialectOps.contains(Func)) { llvm::forEachCall(Func, [&](llvm::CallInst &CInst) { auto Data = ToProcess.find(CInst.getFunction()); @@ -2119,21 +2270,16 @@ void LowerRaytracingPipelinePassImpl::collectGpuRtFunctions() { GetLocalRootIndex = GpurtLibrary->getFunction(ContDriverFunc::GetLocalRootIndexName); - if (GetLocalRootIndex) - assert( - GetLocalRootIndex->getReturnType() == - Type::getInt32Ty(Mod->getContext()) && - GetLocalRootIndex->arg_size() == 1 - // Dispatch data - && - GetLocalRootIndex->getFunctionType()->getParamType(0)->isPointerTy()); - SetLocalRootIndex = getSetLocalRootIndex(*Mod); + assert(GetLocalRootIndex && "Could not find GetLocalRootIndex function"); + assert(GetLocalRootIndex->getReturnType() == + Type::getInt32Ty(Mod->getContext()) && + GetLocalRootIndex->arg_size() == 1 + // Dispatch data + && + GetLocalRootIndex->getFunctionType()->getParamType(0)->isPointerTy()); - SetupRayGen = GpurtLibrary->getFunction(ContDriverFunc::SetupRayGenName); - if (SetupRayGen) - assert(SetupRayGen->getReturnType()->isStructTy() && - SetupRayGen->arg_empty()); + SetLocalRootIndex = getSetLocalRootIndex(*Mod); TraceRay = GpurtLibrary->getFunction(ContDriverFunc::TraceRayName); if (TraceRay) @@ -2186,6 +2332,10 @@ LowerRaytracingPipelinePassImpl::LowerRaytracingPipelinePassImpl( bool LowerRaytracingPipelinePassImpl::run() { collectGpuRtFunctions(); + DispatchSystemDataTy = getFuncArgPtrElementType(GetLocalRootIndex, 0); + assert(DispatchSystemDataTy && "LowerRaytracingPipelinePassImpl::run: Could " + "not derive DispatchSystemData " + "type from GetLocalRootIndex!"); collectProcessableFunctions(); @@ -2198,8 +2348,8 @@ bool LowerRaytracingPipelinePassImpl::run() { llvm_dialects::VisitorBuilder() .setStrategy(llvm_dialects::VisitorStrategy::ByInstruction) .addSet([](VisitorState &State, - Instruction &Op) { + ShaderRecordBufferOp, JumpOp>([](VisitorState &State, + Instruction &Op) { auto *CInst = cast(&Op); auto Data = State.Processables.find(CInst->getFunction()); if (Data == State.Processables.end()) @@ -2215,6 +2365,11 @@ bool LowerRaytracingPipelinePassImpl::run() { return; } + if (auto *Jump = dyn_cast(CInst)) { + Data->second.JumpCalls.push_back(Jump); + return; + } + Type *PayloadTy = ContHelper::getPayloadTypeFromMetadata(*CInst); if (!isa(Op)) { @@ -2305,20 +2460,20 @@ bool LowerRaytracingPipelinePassImpl::run() { } std::optional -llvm::dxilShaderKindToPAQShaderStage(DXILShaderKind ShaderKind) { +llvm::rtShaderStageToPAQShaderStage(RayTracingShaderStage ShaderKind) { switch (ShaderKind) { - case DXILShaderKind::RayGeneration: + case RayTracingShaderStage::RayGeneration: return PAQShaderStage::Caller; - case DXILShaderKind::Intersection: + case RayTracingShaderStage::Intersection: // Explicit: PAQ do not apply to Intersection return {}; - case DXILShaderKind::AnyHit: + case RayTracingShaderStage::AnyHit: return PAQShaderStage::AnyHit; - case DXILShaderKind::ClosestHit: + case RayTracingShaderStage::ClosestHit: return PAQShaderStage::ClosestHit; - case DXILShaderKind::Miss: + case RayTracingShaderStage::Miss: return PAQShaderStage::Miss; - case DXILShaderKind::Callable: + case RayTracingShaderStage::Callable: // Explicit: PAQ do not apply to Callable return {}; default: diff --git a/llvmraytracing/lib/PassRegistry.inc b/llvmraytracing/lib/PassRegistry.inc index 8d3220bcf9..36e61575c8 100644 --- a/llvmraytracing/lib/PassRegistry.inc +++ b/llvmraytracing/lib/PassRegistry.inc @@ -49,6 +49,7 @@ CONT_MODULE_ANALYSIS("dialect-context-analysis", CONT_MODULE_PASS("legacy-cleanup-continuations", LegacyCleanupContinuationsPass()) +CONT_MODULE_PASS("dxil-cleanup-continuations", DXILCleanupContinuationsPass()) CONT_MODULE_PASS("cleanup-continuations", CleanupContinuationsPass()) CONT_MODULE_PASS("lower-raytracing-pipeline", LowerRaytracingPipelinePass()) CONT_MODULE_PASS("dxil-cont-intrinsic-prepare", DXILContIntrinsicPreparePass()) diff --git a/llvmraytracing/lib/PayloadAccessQualifiers.cpp b/llvmraytracing/lib/PayloadAccessQualifiers.cpp index 53117482e6..7f7729089c 100644 --- a/llvmraytracing/lib/PayloadAccessQualifiers.cpp +++ b/llvmraytracing/lib/PayloadAccessQualifiers.cpp @@ -1863,19 +1863,20 @@ PAQSerializationInfoManager::PAQSerializationInfoManager( PAQSerializationInfoBase & PAQSerializationInfoManager::getOrCreateSerializationInfo( - const PAQPayloadConfig &PayloadConfig, DXILShaderKind ShaderKind) { + const PAQPayloadConfig &PayloadConfig, + lgc::rt::RayTracingShaderStage ShaderKind) { switch (ShaderKind) { - case DXILShaderKind::RayGeneration: + case lgc::rt::RayTracingShaderStage::RayGeneration: llvm_unreachable("RayGen does not have an incoming payload"); - case DXILShaderKind::Intersection: - case DXILShaderKind::AnyHit: - case DXILShaderKind::ClosestHit: - case DXILShaderKind::Miss: + case lgc::rt::RayTracingShaderStage::Intersection: + case lgc::rt::RayTracingShaderStage::AnyHit: + case lgc::rt::RayTracingShaderStage::ClosestHit: + case lgc::rt::RayTracingShaderStage::Miss: return getOrCreateTraceRaySerializationInfo(PayloadConfig); - case DXILShaderKind::Callable: + case lgc::rt::RayTracingShaderStage::Callable: return getOrCreateCallShaderSerializationInfo(PayloadConfig); default: - llvm_unreachable("Unexpected DXILShaderKind"); + llvm_unreachable("Unexpected lgc::rt::RayTracingShaderStage"); } } @@ -2003,18 +2004,19 @@ PAQSerializationInfoManager::getOrCreateTraceRayLayout( const PAQSerializationLayout & PAQSerializationInfoManager::getOrCreateShaderStartSerializationLayout( - PAQSerializationInfoBase &SerializationInfo, DXILShaderKind ShaderKind, - Type *HitAttributesTy) { + PAQSerializationInfoBase &SerializationInfo, + lgc::rt::RayTracingShaderStage ShaderKind, Type *HitAttributesTy) { - assert(ShaderKind != DXILShaderKind::RayGeneration && - ShaderKind != DXILShaderKind::Intersection && "Invalid shader kind!"); - if (ShaderKind == DXILShaderKind::Callable) + assert(ShaderKind != lgc::rt::RayTracingShaderStage::RayGeneration && + ShaderKind != lgc::rt::RayTracingShaderStage::Intersection && + "Invalid shader kind!"); + if (ShaderKind == lgc::rt::RayTracingShaderStage::Callable) return cast(SerializationInfo) .CallShaderSerializationLayout; // Always set for non-intersection PAQShaderStage ShaderStage = - dxilShaderKindToPAQShaderStage(ShaderKind).value(); + rtShaderStageToPAQShaderStage(ShaderKind).value(); // Always set for non-caller, non-intersection read access PAQSerializationLayoutKind LayoutKind = tryDetermineLayoutKind(ShaderStage, PAQAccessKind::Read).value(); @@ -2025,17 +2027,19 @@ PAQSerializationInfoManager::getOrCreateShaderStartSerializationLayout( const PAQSerializationLayout & PAQSerializationInfoManager::getOrCreateShaderExitSerializationLayout( - PAQSerializationInfoBase &SerializationInfo, DXILShaderKind ShaderKind, - Type *HitAttributesTy, AnyHitExitKind AHExitKind) { - - assert(ShaderKind != DXILShaderKind::RayGeneration && - ShaderKind != DXILShaderKind::Intersection && "Invalid shader kind!"); - if (ShaderKind == DXILShaderKind::Callable) + PAQSerializationInfoBase &SerializationInfo, + lgc::rt::RayTracingShaderStage ShaderKind, Type *HitAttributesTy, + AnyHitExitKind AHExitKind) { + + assert(ShaderKind != lgc::rt::RayTracingShaderStage::RayGeneration && + ShaderKind != lgc::rt::RayTracingShaderStage::Intersection && + "Invalid shader kind!"); + if (ShaderKind == lgc::rt::RayTracingShaderStage::Callable) return cast(SerializationInfo) .CallShaderSerializationLayout; PAQShaderStage ShaderStage = - dxilShaderKindToPAQShaderStage(ShaderKind).value(); + rtShaderStageToPAQShaderStage(ShaderKind).value(); std::optional OptLayoutKind = tryDetermineLayoutKind(ShaderStage, PAQAccessKind::Write); if (!OptLayoutKind) { diff --git a/llvmraytracing/lib/RegisterBuffer.cpp b/llvmraytracing/lib/RegisterBuffer.cpp index 23dfaaa867..c69c5d53eb 100644 --- a/llvmraytracing/lib/RegisterBuffer.cpp +++ b/llvmraytracing/lib/RegisterBuffer.cpp @@ -64,8 +64,6 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/InitializePasses.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include #include diff --git a/llvmraytracing/lib/RemoveTypesMetadata.cpp b/llvmraytracing/lib/RemoveTypesMetadata.cpp index fcb3d47881..f45803e3d1 100644 --- a/llvmraytracing/lib/RemoveTypesMetadata.cpp +++ b/llvmraytracing/lib/RemoveTypesMetadata.cpp @@ -30,12 +30,8 @@ //===----------------------------------------------------------------------===// #include "llvmraytracing/Continuations.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/InitializePasses.h" #include using namespace llvm; diff --git a/llvmraytracing/test/dx/cleanup-continuations-malloc.ll b/llvmraytracing/test/dx/cleanup-continuations-malloc.ll index 06ac764f00..29b8085b78 100644 --- a/llvmraytracing/test/dx/cleanup-continuations-malloc.ll +++ b/llvmraytracing/test/dx/cleanup-continuations-malloc.ll @@ -13,12 +13,11 @@ define <4 x i32> @simple_await(<4 x i32> %arg) !continuation.registercount !1 { ; CHECK-LABEL: define void @simple_await( ; CHECK-SAME: i64 [[RETURNADDR:%.*]], <4 x i32> [[ARG:%.*]]) !continuation.registercount [[META1:![0-9]+]] !continuation [[META2:![0-9]+]] !continuation.stacksize [[META3:![0-9]+]] !continuation.state [[META3]] { ; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) +; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CHECK-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CHECK-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) ; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 ; CHECK-NEXT: unreachable ; @@ -31,12 +30,11 @@ define void @simple_await_entry(<4 x i32> %arg, <4 x i32> addrspace(1)* %mem) !c ; CHECK-LABEL: define void @simple_await_entry( ; CHECK-SAME: <4 x i32> [[ARG:%.*]], ptr addrspace(1) [[MEM:%.*]]) !continuation.registercount [[META1]] !continuation.entry [[META4:![0-9]+]] !continuation [[META5:![0-9]+]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { ; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CHECK-NEXT: [[MEM_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) +; CHECK-NEXT: [[MEM_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CHECK-NEXT: store ptr addrspace(1) [[MEM]], ptr addrspace(32) [[MEM_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CHECK-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) ; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 ; CHECK-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/cleanup-continuations.ll b/llvmraytracing/test/dx/cleanup-continuations.ll index 3a3a9891ce..a5372bc86c 100644 --- a/llvmraytracing/test/dx/cleanup-continuations.ll +++ b/llvmraytracing/test/dx/cleanup-continuations.ll @@ -17,11 +17,10 @@ define { i8*, %continuation.token* } @simple_await(i8* %0) !continuation !0 !con ; CHECK-LABEL: define void @simple_await( ; CHECK-SAME: ) !continuation [[META1:![0-9]+]] !continuation.registercount [[META2:![0-9]+]] !continuation.stacksize [[META3:![0-9]+]] !continuation.state [[META3]] { ; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[TMP0]] to ptr addrspace(32) +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]] to ptr addrspace(32) ; CHECK-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[FRAMEPTR]], i32 0, i32 0 ; CHECK-NEXT: store i64 -1, ptr addrspace(32) [[DOTSPILL_ADDR]], align 4 -; CHECK-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) ; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 ; CHECK-NEXT: unreachable ; @@ -38,12 +37,12 @@ define internal { i8*, %continuation.token* } @simple_await.resume.0(i8* noalias ; CHECK-LABEL: define dso_local void @simple_await.resume.0( ; CHECK-SAME: ) !continuation [[META1]] !continuation.registercount [[META2]] { ; CHECK-NEXT: entryresume.0: -; CHECK-NEXT: call void @lgc.cps.free(i32 8) -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[TMP0]] to ptr addrspace(32) +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]] to ptr addrspace(32) ; CHECK-NEXT: [[VFRAME:%.*]] = bitcast ptr addrspace(32) [[FRAMEPTR]] to ptr addrspace(32) ; CHECK-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[FRAMEPTR]], i32 0, i32 0 ; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i64, ptr addrspace(32) [[DOTRELOAD_ADDR]], align 4 +; CHECK-NEXT: call void @lgc.cps.free(i32 8) ; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 [[DOTRELOAD]]), !continuation.registercount [[META2]] ; CHECK-NEXT: unreachable ; @@ -60,9 +59,8 @@ define { i8*, %continuation.token* } @simple_await_entry(i8* %0) !continuation.e ; CHECK-LABEL: define void @simple_await_entry( ; CHECK-SAME: ) !continuation [[META4:![0-9]+]] !continuation.registercount [[META2]] !continuation.entry [[META5:![0-9]+]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { ; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[TMP0]] to ptr addrspace(32) -; CHECK-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]] to ptr addrspace(32) ; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 ; CHECK-NEXT: unreachable ; @@ -79,10 +77,10 @@ define internal { i8*, %continuation.token* } @simple_await_entry.resume.0(i8* n ; CHECK-LABEL: define dso_local void @simple_await_entry.resume.0( ; CHECK-SAME: ) !continuation [[META4]] !continuation.registercount [[META2]] { ; CHECK-NEXT: entryresume.0: -; CHECK-NEXT: call void @lgc.cps.free(i32 8) -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[TMP0]] to ptr addrspace(32) +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]] to ptr addrspace(32) ; CHECK-NEXT: [[VFRAME:%.*]] = bitcast ptr addrspace(32) [[FRAMEPTR]] to ptr addrspace(32) +; CHECK-NEXT: call void @lgc.cps.free(i32 8) ; CHECK-NEXT: ret void ; CHECK: entryresume.0.split: ; CHECK-NEXT: unreachable @@ -97,11 +95,10 @@ entryresume.0: define { i8*, %continuation.token* } @await_with_ret_value(i8* %0) !continuation !1 !continuation.registercount !4 { ; CHECK-LABEL: define void @await_with_ret_value( ; CHECK-SAME: ) !continuation [[META6:![0-9]+]] !continuation.registercount [[META2]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[TMP1]] to ptr addrspace(32) +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]] to ptr addrspace(32) ; CHECK-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(32) [[FRAMEPTR]], i32 0, i32 0 ; CHECK-NEXT: store i64 -1, ptr addrspace(32) [[DOTSPILL_ADDR]], align 4 -; CHECK-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) ; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @await_with_ret_value.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 ; CHECK-NEXT: unreachable ; @@ -116,12 +113,12 @@ define { i8*, %continuation.token* } @await_with_ret_value(i8* %0) !continuation define internal { i8*, %continuation.token* } @await_with_ret_value.resume.0(i8* noalias nonnull align 16 dereferenceable(8) %0, i1 %1) !continuation !1 { ; CHECK-LABEL: define dso_local void @await_with_ret_value.resume.0( ; CHECK-SAME: i32 [[RES1:%.*]]) !continuation [[META6]] !continuation.registercount [[META2]] { -; CHECK-NEXT: call void @lgc.cps.free(i32 8) -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[TMP1]] to ptr addrspace(32) +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]] to ptr addrspace(32) ; CHECK-NEXT: [[VFRAME:%.*]] = bitcast ptr addrspace(32) [[FRAMEPTR]] to ptr addrspace(32) ; CHECK-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(32) [[FRAMEPTR]], i32 0, i32 0 ; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i64, ptr addrspace(32) [[DOTRELOAD_ADDR]], align 4 +; CHECK-NEXT: call void @lgc.cps.free(i32 8) ; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 [[DOTRELOAD]], i32 [[RES1]]), !continuation.registercount [[META2]] ; CHECK-NEXT: unreachable ; @@ -147,8 +144,8 @@ attributes #0 = { nounwind } ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { noreturn } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind willreturn memory(inaccessiblemem: read) } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind willreturn memory(inaccessiblemem: readwrite) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind willreturn memory(inaccessiblemem: readwrite) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind willreturn memory(inaccessiblemem: read) } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 21} ; CHECK: [[META1]] = !{ptr @simple_await} diff --git a/llvmraytracing/test/dx/closest-hit-procedural.ll b/llvmraytracing/test/dx/closest-hit-procedural.ll index 08024ba105..80b5bca1c8 100644 --- a/llvmraytracing/test/dx/closest-hit-procedural.ll +++ b/llvmraytracing/test/dx/closest-hit-procedural.ll @@ -119,7 +119,7 @@ declare !types !35 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %s ; Function Attrs: nounwind define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.HitAttributes* nocapture readonly %attr) #3 !types !36 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @ClosestHit( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !lgc.rt.shaderstage [[META19:![0-9]+]] !continuation [[META20:![0-9]+]] !continuation.registercount [[META21:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !lgc.rt.shaderstage [[META19:![0-9]+]] !continuation [[META20:![0-9]+]] !continuation.registercount [[META16:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 @@ -127,60 +127,50 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP5]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP16]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[HITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[TMP20]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S:%.*]], ptr addrspace(20) @PAYLOAD, i32 0, i32 0, i32 1), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S]], ptr addrspace(20) @PAYLOAD, i32 0, i32 0, i32 2), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP27]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP28]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP30]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[TMP27]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP31]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[TMP31]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP30]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP31]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[TMP31]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[TMP31]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP38]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP39]], !continuation.registercount [[META21]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP39]], !continuation.registercount [[META16]] ; ; DXILCONTPOSTPROCESS-LABEL: define void @ClosestHit( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.registercount [[META19:![0-9]+]] !continuation.state [[META14:![0-9]+]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] !lgc.rt.shaderstage [[META18:![0-9]+]] !continuation [[META19:![0-9]+]] !continuation.registercount [[META16:![0-9]+]] !continuation.state [[META14:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 @@ -190,28 +180,28 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; DXILCONTPOSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP6]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_02_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_02_0_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_02_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_02_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 1) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 2) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S:%.*]], ptr addrspace(20) @REGISTERS, i32 0, i32 0, i32 1), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S]], ptr addrspace(20) @REGISTERS, i32 0, i32 0, i32 2), align 4 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP11]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP12]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META19]] +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP12]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META16]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ret void diff --git a/llvmraytracing/test/dx/closest-hit-traceray.ll b/llvmraytracing/test/dx/closest-hit-traceray.ll index 5904ac8f1f..683eb79e8f 100644 --- a/llvmraytracing/test/dx/closest-hit-traceray.ll +++ b/llvmraytracing/test/dx/closest-hit-traceray.ll @@ -117,7 +117,7 @@ declare !types !35 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %s ; Function Attrs: nounwind define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !types !36 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @ClosestHit( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !continuation [[META18:![0-9]+]] !lgc.rt.shaderstage [[META19:![0-9]+]] !continuation.registercount [[META20:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !continuation [[META18:![0-9]+]] !lgc.rt.shaderstage [[META19:![0-9]+]] !continuation.registercount [[META16:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 @@ -125,29 +125,23 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP5]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP16]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[HITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -155,40 +149,36 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = alloca [[STRUCT_RAYPAYLOAD]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = bitcast ptr [[TMP25]] to ptr -; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP26]]) #[[ATTR10:[0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP26]]) #[[ATTR9:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP25]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP27]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP23]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP28]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP23]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP28]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP29]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP31]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[NEWDATA_I:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @amd.dx.Traversal([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[NEWDATA_I:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AMD_DX_TRAVERSAL:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[NEWDATA_I]], ptr [[TMP31]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP32]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[TMP33]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[TMP32]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr [[TMP36]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[TMP36]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP32]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP36]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP40]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr i32, ptr [[TMP36]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP40]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP43]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP44]], !continuation.registercount [[META20]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP44]], !continuation.registercount [[META16]] ; ; DXILCONTPOSTPROCESS-LABEL: define void @ClosestHit( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !continuation [[META17:![0-9]+]] !lgc.rt.shaderstage [[META18:![0-9]+]] !continuation.registercount [[META19:![0-9]+]] !continuation.state [[META14:![0-9]+]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !continuation [[META18:![0-9]+]] !lgc.rt.shaderstage [[META19:![0-9]+]] !continuation.registercount [[META16:![0-9]+]] !continuation.state [[META14:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 @@ -198,10 +188,10 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; DXILCONTPOSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP6]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_03_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_03_0_VEC_EXTRACT]] to i32 @@ -210,8 +200,8 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP9]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP11]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP9]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP11]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP12]]) ; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP14]], i32 0, i32 0 @@ -219,20 +209,20 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DIS_DATA_I_FCA_0_LOAD]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[NEWDATA_I:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @amd.dx.Traversal([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]) +; DXILCONTPOSTPROCESS-NEXT: [[NEWDATA_I:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AMD_DX_TRAVERSAL:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]) ; DXILCONTPOSTPROCESS-NEXT: [[NEWDATA_I_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[NEWDATA_I]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[NEWDATA_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP14]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store <3 x i32> [[NEWDATA_I_FCA_0_EXTRACT]], ptr [[NEWDATA_I_FCA_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP15]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP16]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META19]] +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP16]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META16]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 diff --git a/llvmraytracing/test/dx/closest-hit.ll b/llvmraytracing/test/dx/closest-hit.ll index a373d279c6..8f9291ee73 100644 --- a/llvmraytracing/test/dx/closest-hit.ll +++ b/llvmraytracing/test/dx/closest-hit.ll @@ -104,7 +104,7 @@ declare !types !29 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %s ; Function Attrs: nounwind define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !types !30 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @ClosestHit( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !lgc.rt.shaderstage [[META13:![0-9]+]] !continuation [[META14:![0-9]+]] !continuation.registercount [[META15:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !lgc.rt.shaderstage [[META13:![0-9]+]] !continuation [[META14:![0-9]+]] !continuation.registercount [[META10:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 @@ -112,23 +112,17 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP5]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP12]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[HITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -136,19 +130,15 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; LOWERRAYTRACINGPIPELINE-NEXT: [[BARYPTR:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[HITATTRS]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[BARY:%.*]] = load <2 x float>, ptr [[BARYPTR]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store <2 x float> [[BARY]], ptr [[PTR]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP19]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP26]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP27]], !continuation.registercount [[META15]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP27]], !continuation.registercount [[META10]] ; %ptr = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 %baryPtr = getelementptr inbounds %struct.BuiltInTriangleIntersectionAttributes, %struct.BuiltInTriangleIntersectionAttributes* %attr, i32 0, i32 0 diff --git a/llvmraytracing/test/dx/continuation-state.ll b/llvmraytracing/test/dx/continuation-state.ll index 25711f37c8..014ea354ba 100644 --- a/llvmraytracing/test/dx/continuation-state.ll +++ b/llvmraytracing/test/dx/continuation-state.ll @@ -38,12 +38,11 @@ define void @simple_await_entry(<4 x i32> %arg, <4 x i32> addrspace(1)* %mem) !c ; CLEANUP-LABEL: define void @simple_await( ; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], <4 x i32> [[ARG:%.*]]) !continuation.registercount [[META2:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.stacksize [[META4:![0-9]+]] !continuation.state [[META4]] { ; CLEANUP-NEXT: AllocaSpillBB: -; CLEANUP-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) +; CLEANUP-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 -; CLEANUP-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) ; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 ; CLEANUP-NEXT: unreachable ; @@ -51,12 +50,12 @@ define void @simple_await_entry(<4 x i32> %arg, <4 x i32> addrspace(1)* %mem) !c ; CLEANUP-LABEL: define dso_local void @simple_await.resume.0( ; CLEANUP-SAME: ) !continuation.registercount [[META2]] !continuation [[META3]] { ; CLEANUP-NEXT: entryresume.0: -; CLEANUP-NEXT: call void @lgc.cps.free(i32 24) -; CLEANUP-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 24) +; CLEANUP-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(32) [[ARG_RELOAD_ADDR]], align 4 -; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 +; CLEANUP-NEXT: call void @lgc.cps.free(i32 24) ; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], <4 x i32> [[ARG_RELOAD]]), !continuation.registercount [[META2]] ; CLEANUP-NEXT: unreachable ; @@ -64,12 +63,11 @@ define void @simple_await_entry(<4 x i32> %arg, <4 x i32> addrspace(1)* %mem) !c ; CLEANUP-LABEL: define void @simple_await_entry( ; CLEANUP-SAME: <4 x i32> [[ARG:%.*]], ptr addrspace(1) [[MEM:%.*]]) !continuation.registercount [[META2]] !continuation.entry [[META5:![0-9]+]] !continuation [[META6:![0-9]+]] !continuation.stacksize [[META4]] !continuation.state [[META4]] { ; CLEANUP-NEXT: AllocaSpillBB: -; CLEANUP-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[MEM_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) +; CLEANUP-NEXT: [[MEM_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: store ptr addrspace(1) [[MEM]], ptr addrspace(32) [[MEM_SPILL_ADDR]], align 4 -; CLEANUP-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANUP-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; CLEANUP-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) ; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 ; CLEANUP-NEXT: unreachable ; @@ -77,13 +75,13 @@ define void @simple_await_entry(<4 x i32> %arg, <4 x i32> addrspace(1)* %mem) !c ; CLEANUP-LABEL: define dso_local void @simple_await_entry.resume.0( ; CLEANUP-SAME: ) !continuation.registercount [[META2]] !continuation [[META6]] { ; CLEANUP-NEXT: entryresume.0: -; CLEANUP-NEXT: call void @lgc.cps.free(i32 24) -; CLEANUP-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[MEM_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 24) +; CLEANUP-NEXT: [[MEM_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: [[MEM_RELOAD:%.*]] = load ptr addrspace(1), ptr addrspace(32) [[MEM_RELOAD_ADDR]], align 4 -; CLEANUP-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANUP-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(32) [[ARG_RELOAD_ADDR]], align 4 ; CLEANUP-NEXT: store <4 x i32> [[ARG_RELOAD]], ptr addrspace(1) [[MEM_RELOAD]], align 4 +; CLEANUP-NEXT: call void @lgc.cps.free(i32 24) ; CLEANUP-NEXT: ret void ; CLEANUP: entryresume.0.split: ; CLEANUP-NEXT: unreachable @@ -92,12 +90,11 @@ define void @simple_await_entry(<4 x i32> %arg, <4 x i32> addrspace(1)* %mem) !c ; REGISTERBUFFER-LABEL: define void @simple_await( ; REGISTERBUFFER-SAME: i64 [[RETURNADDR:%.*]], <4 x i32> [[ARG:%.*]]) !continuation.registercount [[META2:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.stacksize [[META4:![0-9]+]] !continuation.state [[META4]] { ; REGISTERBUFFER-NEXT: AllocaSpillBB: -; REGISTERBUFFER-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; REGISTERBUFFER-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) +; REGISTERBUFFER-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; REGISTERBUFFER-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; REGISTERBUFFER-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; REGISTERBUFFER-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) ; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 ; REGISTERBUFFER-NEXT: unreachable ; @@ -105,12 +102,12 @@ define void @simple_await_entry(<4 x i32> %arg, <4 x i32> addrspace(1)* %mem) !c ; REGISTERBUFFER-LABEL: define dso_local void @simple_await.resume.0( ; REGISTERBUFFER-SAME: ) !continuation.registercount [[META2]] !continuation [[META3]] { ; REGISTERBUFFER-NEXT: entryresume.0: -; REGISTERBUFFER-NEXT: call void @lgc.cps.free(i32 24) -; REGISTERBUFFER-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; REGISTERBUFFER-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 24) +; REGISTERBUFFER-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; REGISTERBUFFER-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(32) [[ARG_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 +; REGISTERBUFFER-NEXT: call void @lgc.cps.free(i32 24) ; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], <4 x i32> [[ARG_RELOAD]]), !continuation.registercount [[META2]] ; REGISTERBUFFER-NEXT: unreachable ; @@ -118,12 +115,11 @@ define void @simple_await_entry(<4 x i32> %arg, <4 x i32> addrspace(1)* %mem) !c ; REGISTERBUFFER-LABEL: define void @simple_await_entry( ; REGISTERBUFFER-SAME: <4 x i32> [[ARG:%.*]], ptr addrspace(1) [[MEM:%.*]]) !continuation.registercount [[META2]] !continuation.entry [[META5:![0-9]+]] !continuation [[META6:![0-9]+]] !continuation.stacksize [[META4]] !continuation.state [[META4]] { ; REGISTERBUFFER-NEXT: AllocaSpillBB: -; REGISTERBUFFER-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; REGISTERBUFFER-NEXT: [[MEM_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) +; REGISTERBUFFER-NEXT: [[MEM_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; REGISTERBUFFER-NEXT: store ptr addrspace(1) [[MEM]], ptr addrspace(32) [[MEM_SPILL_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; REGISTERBUFFER-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; REGISTERBUFFER-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) ; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await_entry.resume.0 to i64)), !continuation.registercount [[META2]], !continuation.returnedRegistercount !2 ; REGISTERBUFFER-NEXT: unreachable ; @@ -131,13 +127,13 @@ define void @simple_await_entry(<4 x i32> %arg, <4 x i32> addrspace(1)* %mem) !c ; REGISTERBUFFER-LABEL: define dso_local void @simple_await_entry.resume.0( ; REGISTERBUFFER-SAME: ) !continuation.registercount [[META2]] !continuation [[META6]] { ; REGISTERBUFFER-NEXT: entryresume.0: -; REGISTERBUFFER-NEXT: call void @lgc.cps.free(i32 24) -; REGISTERBUFFER-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; REGISTERBUFFER-NEXT: [[MEM_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 24) +; REGISTERBUFFER-NEXT: [[MEM_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; REGISTERBUFFER-NEXT: [[MEM_RELOAD:%.*]] = load ptr addrspace(1), ptr addrspace(32) [[MEM_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; REGISTERBUFFER-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; REGISTERBUFFER-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(32) [[ARG_RELOAD_ADDR]], align 4 ; REGISTERBUFFER-NEXT: store <4 x i32> [[ARG_RELOAD]], ptr addrspace(1) [[MEM_RELOAD]], align 4 +; REGISTERBUFFER-NEXT: call void @lgc.cps.free(i32 24) ; REGISTERBUFFER-NEXT: ret void ; REGISTERBUFFER: entryresume.0.split: ; REGISTERBUFFER-NEXT: unreachable diff --git a/llvmraytracing/test/dx/continuation-without-await.ll b/llvmraytracing/test/dx/continuation-without-await.ll index 93e1e702a2..4e4bb95f7b 100644 --- a/llvmraytracing/test/dx/continuation-without-await.ll +++ b/llvmraytracing/test/dx/continuation-without-await.ll @@ -130,7 +130,7 @@ attributes #2 = { nounwind } ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define void @main( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !continuation.entry [[META20:![0-9]+]] !continuation.registercount [[META9]] !continuation [[META21:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META20:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[PARAMS:%.*]] = alloca [[STRUCT_THEIRPARAMS:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 @@ -138,59 +138,53 @@ attributes #2 = { nounwind } ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_THEIRPARAMS]] zeroinitializer, ptr [[PARAMS]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = load [1 x i32], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP3]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META22:![0-9]+]], !continuation.returnedRegistercount !22 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP4]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP5]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount !21 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_STRUCT_DISPATCHSYSTEMDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP6]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_THEIRPARAMS]] poison, ptr [[PARAMS]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP7]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP7]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META17:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META16:![0-9]+]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define void @main_no_call( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9]] !continuation.entry [[META20]] !continuation.registercount [[META9]] !continuation [[META23:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.entry [[META19]] !continuation.registercount [[META8]] !continuation [[META22:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META17]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META16]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @called( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META24:![0-9]+]] !continuation.registercount [[META18:![0-9]+]] !continuation [[META25:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META24:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP6]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP4]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr getelementptr (i32, ptr @PAYLOAD, i64 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP12]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr getelementptr (i32, ptr @PAYLOAD, i64 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP19]], !continuation.registercount [[META18]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP19]], !continuation.registercount [[META17]] ; ; ; CLEANUP-LABEL: define i32 @_cont_GetLocalRootIndex( @@ -199,22 +193,20 @@ attributes #2 = { nounwind } ; ; ; CLEANUP-LABEL: define void @main( -; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !continuation.entry [[META20:![0-9]+]] !continuation.registercount [[META9]] !continuation [[META21:![0-9]+]] !continuation.state [[META9]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META20:![0-9]+]] !continuation.state [[META8]] { ; CLEANUP-NEXT: AllocaSpillBB: ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 -; CLEANUP-NEXT: store [1 x i32] [[DOTFCA_0_INSERT]], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 ptrtoint (ptr @main.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META22:![0-9]+]], !continuation.returnedRegistercount !22 +; CLEANUP-NEXT: store i32 0, ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 ptrtoint (ptr @main.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount !21 ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @main.resume.0( -; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9]] !continuation.registercount [[META22]] !continuation [[META21]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META21]] !continuation [[META20]] { ; CLEANUP-NEXT: entryresume.0: -; CLEANUP-NEXT: [[TMP1:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [1 x i32] [[TMP1]], 0 +; CLEANUP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; CLEANUP-NEXT: ret void @@ -223,7 +215,7 @@ attributes #2 = { nounwind } ; ; ; CLEANUP-LABEL: define void @main_no_call( -; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9]] !continuation.entry [[META20]] !continuation.registercount [[META9]] !continuation [[META23:![0-9]+]] !continuation.state [[META9]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.entry [[META19]] !continuation.registercount [[META8]] !continuation [[META22:![0-9]+]] !continuation.state [[META8]] { ; CLEANUP-NEXT: AllocaSpillBB: ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) @@ -233,20 +225,18 @@ attributes #2 = { nounwind } ; ; ; CLEANUP-LABEL: define void @called( -; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META24:![0-9]+]] !continuation.registercount [[META18:![0-9]+]] !continuation [[META25:![0-9]+]] !continuation.state [[META9]] { +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META24:![0-9]+]] !continuation.state [[META8]] { ; CLEANUP-NEXT: AllocaSpillBB: ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; CLEANUP-NEXT: [[TMP1:%.*]] = load i32, ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 1), align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 2), align 4 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; CLEANUP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: store i32 [[TMP1]], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: store i32 [[TMP2]], ptr getelementptr (i32, ptr @PAYLOAD, i64 1), align 4 -; CLEANUP-NEXT: store i32 [[TMP3]], ptr getelementptr (i32, ptr @PAYLOAD, i64 2), align 4 +; CLEANUP-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: store i32 [[TMP2]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; CLEANUP-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 ; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META18]] +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] ; CLEANUP-NEXT: unreachable ; ; @@ -261,9 +251,7 @@ attributes #2 = { nounwind } ; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; REGISTERBUFFER-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; REGISTERBUFFER-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 -; REGISTERBUFFER-NEXT: [[DOTFCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 -; REGISTERBUFFER-NEXT: store i32 [[DOTFCA_0_INSERT_FCA_0_EXTRACT]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-NEXT: store i32 0, ptr addrspace(20) @PAYLOAD, align 4 ; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 ptrtoint (ptr @main.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount !21 ; REGISTERBUFFER-NEXT: unreachable ; @@ -272,8 +260,6 @@ attributes #2 = { nounwind } ; REGISTERBUFFER-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META21]] !continuation [[META20]] { ; REGISTERBUFFER-NEXT: entryresume.0: ; REGISTERBUFFER-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[TMP1]], 0 -; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 ; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; REGISTERBUFFER-NEXT: ret void @@ -296,14 +282,12 @@ attributes #2 = { nounwind } ; REGISTERBUFFER-NEXT: AllocaSpillBB: ; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; REGISTERBUFFER-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i64 1) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i64 2) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; REGISTERBUFFER-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 ; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; REGISTERBUFFER-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) ; REGISTERBUFFER-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-NEXT: store i32 [[TMP2]], ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i64 1) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i64 2) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-NEXT: store i32 [[TMP2]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; REGISTERBUFFER-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 ; REGISTERBUFFER-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 ; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] ; REGISTERBUFFER-NEXT: unreachable @@ -320,13 +304,11 @@ attributes #2 = { nounwind } ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() ; POSTPROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() +; POSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[_CONT_SETUPRAYGEN:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() ; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 -; POSTPROCESS-NEXT: store i32 [[DOTFCA_0_INSERT_FCA_0_EXTRACT]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: store i32 0, ptr addrspace(20) @REGISTERS, align 4 ; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP2:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @main.resume.0 to i64)) ; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP1]], i64 [[TMP2]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount !21 @@ -339,8 +321,6 @@ attributes #2 = { nounwind } ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[TMP1]], 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 ; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; POSTPROCESS-NEXT: ret void @@ -354,7 +334,7 @@ attributes #2 = { nounwind } ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() ; POSTPROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() +; POSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[_CONT_SETUPRAYGEN:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() ; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; POSTPROCESS-NEXT: ret void @@ -369,12 +349,12 @@ attributes #2 = { nounwind } ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i64 1) to ptr addrspace(20)), align 4 -; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i64 2) to ptr addrspace(20)), align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 2), align 4 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POSTPROCESS-NEXT: store i32 [[TMP1]], ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i64 1) to ptr addrspace(20)), align 4 -; POSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr (i32, ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i64 2) to ptr addrspace(20)), align 4 +; POSTPROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 +; POSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 2), align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 ; POSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP4]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] diff --git a/llvmraytracing/test/dx/dxil-cont-post-process.ll b/llvmraytracing/test/dx/dxil-cont-post-process.ll index b767a069e6..2bd1e5d275 100644 --- a/llvmraytracing/test/dx/dxil-cont-post-process.ll +++ b/llvmraytracing/test/dx/dxil-cont-post-process.ll @@ -9,7 +9,6 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: @debug_global = external global i1 declare void @_AmdComplete() #0 -declare i32 @continuation.initialContinuationStackPtr() declare i32 @_cont_GetContinuationStackAddr() declare i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) diff --git a/llvmraytracing/test/dx/dxil-cont-prepare-traversal.ll b/llvmraytracing/test/dx/dxil-cont-prepare-traversal.ll index 55317479f3..dff10ffd45 100644 --- a/llvmraytracing/test/dx/dxil-cont-prepare-traversal.ll +++ b/llvmraytracing/test/dx/dxil-cont-prepare-traversal.ll @@ -24,6 +24,8 @@ declare !types !2 i32 @"\01?_AmdValueGetI32Something@@YA_KXZ"(%struct.TraversalD declare !types !3 void @"\01?_AmdValueSetI32Something@@YA_KXZ"(%struct.TraversalData*, i32, i32) +declare !types !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) + ; Function Attrs: nounwind define void @_cont_Traversal(i32 %stackPtr, %struct.TraversalData* %data) #0 !types !4 { %1 = getelementptr inbounds %struct.TraversalData, %struct.TraversalData* %data, i32 0, i32 1 @@ -72,6 +74,8 @@ attributes #2 = { nounwind } !4 = !{!"function", !"void", i32 poison, !1} !5 = !{!"function", !"void", i64 poison, i64 poison, i32 poison, !6} !6 = !{i32 0, %struct.SystemData poison} +!7 = !{i32 0, %struct.DispatchSystemData poison} +!8 = !{!"function", i32 poison, !7} ; PREPARE-LABEL: define void @_cont_Traversal( ; PREPARE-SAME: i32 [[STACKPTR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[DATA:%.*]]) #[[ATTR1:[0-9]+]] !lgc.rt.shaderstage [[META0:![0-9]+]] { ; PREPARE-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 @@ -96,11 +100,11 @@ attributes #2 = { nounwind } ; PREPARE-NEXT: [[ADDR:%.*]] = zext i32 [[A4]] to i64 ; PREPARE-NEXT: [[TMP7:%.*]] = load [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP5]], align 4 ; PREPARE-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 [[ADDR]], i64 -1, i32 [[STACKPTR]], i64 ptrtoint (ptr @_cont_Traversal to i64), [[STRUCT_SYSTEMDATA]] [[TMP7]]) -; PREPARE-NEXT: br label [[TMP10:%.*]] +; PREPARE-NEXT: unreachable ; PREPARE: 8: ; PREPARE-NEXT: [[TMP9:%.*]] = load [[STRUCT_SYSTEMDATA]], ptr [[TMP5]], align 4 ; PREPARE-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 0, i64 -1, i32 [[STACKPTR]], [[STRUCT_SYSTEMDATA]] [[TMP9]]) -; PREPARE-NEXT: br label [[TMP10]] +; PREPARE-NEXT: unreachable ; PREPARE: 10: ; PREPARE-NEXT: ret void ; @@ -146,7 +150,7 @@ attributes #2 = { nounwind } ; ALL-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 ; ALL-NEXT: [[TMP12:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @_cont_Traversal to i64)) ; ALL-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 [[ADDR]], i64 -1, i32 [[TMP11]], i32 [[STACKPTR]], i64 [[TMP12]], [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT]]), !continuation.registercount [[META0]] -; ALL-NEXT: br label [[TMP15:%.*]] +; ALL-NEXT: unreachable ; ALL: 13: ; ALL-NEXT: [[DOTFCA_0_0_GEP1:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[TMP5]], i32 0, i32 0, i32 0 ; ALL-NEXT: [[DOTFCA_0_0_LOAD2:%.*]] = load i32, ptr [[DOTFCA_0_0_GEP1]], align 4 @@ -156,7 +160,7 @@ attributes #2 = { nounwind } ; ALL-NEXT: [[DOTFCA_1_INSERT6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] [[DOTFCA_0_0_INSERT3]], float [[DOTFCA_1_LOAD5]], 1 ; ALL-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 ; ALL-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 0, i64 -1, i32 [[TMP14]], i32 [[STACKPTR]], [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT6]]), !continuation.registercount [[META0]] -; ALL-NEXT: br label [[TMP15]] +; ALL-NEXT: unreachable ; ALL: 15: ; ALL-NEXT: ret void ; diff --git a/llvmraytracing/test/dx/dxil-cps-stack-lowering-global.ll b/llvmraytracing/test/dx/dxil-cps-stack-lowering-global.ll index 814981a812..d449f86723 100644 --- a/llvmraytracing/test/dx/dxil-cps-stack-lowering-global.ll +++ b/llvmraytracing/test/dx/dxil-cps-stack-lowering-global.ll @@ -45,8 +45,8 @@ AllocaSpillBB: store i32 99, ptr addrspace(32) %ptr %dis_data.i.fca.0.insert = insertvalue %struct.DispatchSystemData poison, i32 %.fca.0.extract, 0 store i32 undef, ptr addrspace(20) @PAYLOAD, align 4 - %3 = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) - call void (...) @lgc.cps.jump(i32 2, i32 2, %struct.type %cont.state, i32 %3, %struct.DispatchSystemData %dis_data.i.fca.0.insert), !continuation.registercount !16 + %3 = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @called.resume.0) + call void (...) @lgc.cps.jump(i32 2, i32 2, %struct.type %cont.state, i64 %3, %struct.DispatchSystemData %dis_data.i.fca.0.insert), !continuation.registercount !16 unreachable } @@ -107,7 +107,7 @@ declare void @continuation.return(...) #3 declare ptr addrspace(32) @lgc.cps.alloc(i32) #5 ; Function Attrs: nounwind willreturn -declare i32 @lgc.cps.as.continuation.reference(...) #2 +declare i64 @lgc.cps.as.continuation.reference__i64(...) #2 ; Function Attrs: nounwind willreturn memory(inaccessiblemem: read) declare ptr addrspace(32) @lgc.cps.peek(i32) #6 @@ -115,6 +115,8 @@ declare ptr addrspace(32) @lgc.cps.peek(i32) #6 ; Function Attrs: nounwind willreturn memory(inaccessiblemem: readwrite) declare void @lgc.cps.free(i32) #5 +declare void @continuation.continue(i64, ...) + attributes #0 = { nofree nounwind willreturn } attributes #1 = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } attributes #2 = { nounwind willreturn } @@ -175,8 +177,9 @@ attributes #6 = { nounwind willreturn memory(inaccessiblemem: read) } ; CPS-STACK-LOWERING-CPS-NEXT: store i32 99, ptr addrspace(22) [[TMP8]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 undef, ptr addrspace(20) @REGISTERS, align 4 -; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) -; CPS-STACK-LOWERING-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, [[STRUCT_TYPE]] [[CONT_STATE]], i32 [[TMP9]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META16]] +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP10:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @called.resume.0 to i64)) +; CPS-STACK-LOWERING-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP9]], i64 [[TMP10]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META16]] ; CPS-STACK-LOWERING-CPS-NEXT: unreachable ; ; @@ -200,6 +203,8 @@ attributes #6 = { nounwind willreturn memory(inaccessiblemem: read) } ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -8 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP10]], ptr [[CSP]], align 4 -; CPS-STACK-LOWERING-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 2, [[STRUCT_TYPE]] [[TMP0]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META16]] +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP11:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP11]], i32 [[TMP12]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META16]] ; CPS-STACK-LOWERING-CPS-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/dxil-cps-stack-lowering-scratch.ll b/llvmraytracing/test/dx/dxil-cps-stack-lowering-scratch.ll index 9f0d0f79cb..b0ec32bdb7 100644 --- a/llvmraytracing/test/dx/dxil-cps-stack-lowering-scratch.ll +++ b/llvmraytracing/test/dx/dxil-cps-stack-lowering-scratch.ll @@ -45,8 +45,8 @@ AllocaSpillBB: store i32 99, ptr addrspace(32) %ptr %dis_data.i.fca.0.insert = insertvalue %struct.DispatchSystemData poison, i32 %.fca.0.extract, 0 store i32 undef, ptr addrspace(20) @PAYLOAD, align 4 - %3 = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) - call void (...) @lgc.cps.jump(i32 2, i32 2, %struct.type %cont.state, i32 %3, %struct.DispatchSystemData %dis_data.i.fca.0.insert), !continuation.registercount !16 + %3 = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @called.resume.0) + call void (...) @lgc.cps.jump(i32 2, i32 2, %struct.type %cont.state, i64 %3, %struct.DispatchSystemData %dis_data.i.fca.0.insert), !continuation.registercount !16 unreachable } @@ -107,7 +107,7 @@ declare void @continuation.return(...) #3 declare ptr addrspace(32) @lgc.cps.alloc(i32) #5 ; Function Attrs: nounwind willreturn -declare i32 @lgc.cps.as.continuation.reference(...) #2 +declare i64 @lgc.cps.as.continuation.reference__i64(...) #2 ; Function Attrs: nounwind willreturn memory(inaccessiblemem: read) declare ptr addrspace(32) @lgc.cps.peek(i32) #6 @@ -175,8 +175,9 @@ attributes #6 = { nounwind willreturn memory(inaccessiblemem: read) } ; CPS-STACK-LOWERING-CPS-NEXT: store i32 99, ptr addrspace(21) [[TMP8]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 undef, ptr addrspace(20) @REGISTERS, align 4 -; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) -; CPS-STACK-LOWERING-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, [[STRUCT_TYPE]] [[CONT_STATE]], i32 [[TMP9]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META16]] +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP10:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @called.resume.0 to i64)) +; CPS-STACK-LOWERING-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP9]], i64 [[TMP10]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META16]] ; CPS-STACK-LOWERING-CPS-NEXT: unreachable ; ; @@ -199,6 +200,8 @@ attributes #6 = { nounwind willreturn memory(inaccessiblemem: read) } ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], -8 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP9]], ptr [[CSP]], align 4 -; CPS-STACK-LOWERING-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 2, [[STRUCT_TYPE]] [[TMP0]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META16]] +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP10:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP10]], i32 [[TMP11]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META16]] ; CPS-STACK-LOWERING-CPS-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/global-mem-stack.ll b/llvmraytracing/test/dx/global-mem-stack.ll index ece34abb60..9dfec793e6 100644 --- a/llvmraytracing/test/dx/global-mem-stack.ll +++ b/llvmraytracing/test/dx/global-mem-stack.ll @@ -151,82 +151,68 @@ define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, ; CHECK-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; CHECK-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -8 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 28 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(22) [[TMP6]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float -; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -8 -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 32 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(22) [[TMP12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP13]] to float -; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP14]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], -8 -; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 36 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(22) [[TMP18]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float -; CHECK-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP20]], i32 2 -; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP21]], -8 -; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], 40 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP23]] -; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(22) [[TMP24]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = bitcast i32 [[TMP25]] to float -; CHECK-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP26]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 20 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(22) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float +; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP4]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(22) [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float +; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP4]], 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(22) [[TMP13]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP14]] to float +; CHECK-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP15]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP4]], 12 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(22) [[TMP17]], align 4 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32 [[TMP18]] to float +; CHECK-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP19]], i32 3 ; CHECK-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 ; CHECK-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 ; CHECK-NEXT: [[DOTSROA_06_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; CHECK-NEXT: [[TMP27:%.*]] = bitcast float [[DOTSROA_06_0_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float -; CHECK-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP28]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_06_0_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32 [[TMP20]] to float +; CHECK-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP21]], i32 0 ; CHECK-NEXT: [[DOTSROA_06_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast float [[DOTSROA_06_4_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP30:%.*]] = bitcast i32 [[TMP29]] to float -; CHECK-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP30]], i32 1 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast float [[DOTSROA_06_4_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32 [[TMP22]] to float +; CHECK-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP23]], i32 1 ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 -; CHECK-NEXT: [[TMP32:%.*]] = fsub fast float 1.000000e+00, [[TMP31]] -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 -; CHECK-NEXT: [[TMP34:%.*]] = fsub fast float [[TMP32]], [[TMP33]] -; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x float> undef, float [[TMP34]], i64 0 -; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP31]], i64 1 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP33]], i64 2 -; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x float> [[TMP37]], float 1.000000e+00, i64 3 -; CHECK-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP38]], i32 0 -; CHECK-NEXT: [[TMP39:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], -8 -; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], 28 -; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP42]] -; CHECK-NEXT: store i32 [[TMP39]], ptr addrspace(22) [[TMP43]], align 4 -; CHECK-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP38]], i32 1 -; CHECK-NEXT: [[TMP44:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP45]], -8 -; CHECK-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], 32 -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP47]] -; CHECK-NEXT: store i32 [[TMP44]], ptr addrspace(22) [[TMP48]], align 4 -; CHECK-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP38]], i32 2 -; CHECK-NEXT: [[TMP49:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], -8 -; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 36 -; CHECK-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP52]] -; CHECK-NEXT: store i32 [[TMP49]], ptr addrspace(22) [[TMP53]], align 4 -; CHECK-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP38]], i32 3 -; CHECK-NEXT: [[TMP54:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[TMP55]], -8 -; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP56]], 40 -; CHECK-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP57]] -; CHECK-NEXT: store i32 [[TMP54]], ptr addrspace(22) [[TMP58]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 +; CHECK-NEXT: [[TMP25:%.*]] = fsub fast float 1.000000e+00, [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 +; CHECK-NEXT: [[TMP27:%.*]] = fsub fast float [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x float> undef, float [[TMP27]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP24]], i64 1 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP26]], i64 2 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x float> [[TMP30]], float 1.000000e+00, i64 3 +; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[TMP32]], 20 +; CHECK-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP31]], i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP33]] +; CHECK-NEXT: store i32 [[TMP34]], ptr addrspace(22) [[TMP35]], align 4 +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP33]], 4 +; CHECK-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP31]], i32 1 +; CHECK-NEXT: [[TMP37:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP36]] +; CHECK-NEXT: store i32 [[TMP37]], ptr addrspace(22) [[TMP38]], align 4 +; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP33]], 8 +; CHECK-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP31]], i32 2 +; CHECK-NEXT: [[TMP40:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP39]] +; CHECK-NEXT: store i32 [[TMP40]], ptr addrspace(22) [[TMP41]], align 4 +; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP33]], 12 +; CHECK-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP31]], i32 3 +; CHECK-NEXT: [[TMP43:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP42]] +; CHECK-NEXT: store i32 [[TMP43]], ptr addrspace(22) [[TMP44]], align 4 ; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 -; CHECK-NEXT: [[TMP59:%.*]] = load i32, ptr [[CSP]], align 4 -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP59]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META8]] +; CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP45]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META8]] ; CHECK-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/intrinsics/get-current-func-addr.ll b/llvmraytracing/test/dx/intrinsics/get-current-func-addr.ll index 7cd758b96c..16407ac36d 100644 --- a/llvmraytracing/test/dx/intrinsics/get-current-func-addr.ll +++ b/llvmraytracing/test/dx/intrinsics/get-current-func-addr.ll @@ -1,9 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint' -S %s 2> %t.stderr | FileCheck %s -; RUN: count 0 < %t.stderr +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='lower-raytracing-pipeline,lint' -S 2> %t0.stderr | FileCheck --check-prefix=CHECK %s +; RUN: count 0 < %t0.stderr +; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint' -S %s 2> %t1.stderr | FileCheck --check-prefix=CHECK-CPS %s +; RUN: count 0 < %t1.stderr + +%struct.DispatchSystemData = type { i32 } declare void @Use(i64) declare i64 @_AmdGetCurrentFuncAddr() +declare !types !2 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) define void @MyRayGen() { ; CHECK-LABEL: define void @MyRayGen() { @@ -11,6 +16,12 @@ define void @MyRayGen() { ; CHECK-NEXT: call void @Use(i64 ptrtoint (ptr @MyRayGen to i64)) ; CHECK-NEXT: ret void ; +; CHECK-CPS-LABEL: define void @MyRayGen() { +; CHECK-CPS-NEXT: AllocaSpillBB: +; CHECK-CPS-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyRayGen) +; CHECK-CPS-NEXT: call void @Use(i64 [[TMP0]]) +; CHECK-CPS-NEXT: ret void +; AllocaSpillBB: %val = call i64 @_AmdGetCurrentFuncAddr() call void @Use(i64 %val) @@ -23,9 +34,18 @@ define void @MyRayGen.resume.0() { ; CHECK-NEXT: call void @Use(i64 ptrtoint (ptr @MyRayGen.resume.0 to i64)) ; CHECK-NEXT: ret void ; +; CHECK-CPS-LABEL: define void @MyRayGen.resume.0() { +; CHECK-CPS-NEXT: entryresume.0: +; CHECK-CPS-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyRayGen.resume.0) +; CHECK-CPS-NEXT: call void @Use(i64 [[TMP0]]) +; CHECK-CPS-NEXT: ret void +; entryresume.0: %val = call i64 @_AmdGetCurrentFuncAddr() call void @Use(i64 %val) ret void } +!lgc.cps.module = !{} +!1 = !{i32 0, %struct.DispatchSystemData poison} +!2 = !{!"function", i32 poison, !1} diff --git a/llvmraytracing/test/dx/intrinsics/get-setting.ll b/llvmraytracing/test/dx/intrinsics/get-setting.ll new file mode 100644 index 0000000000..f6a2433fc4 --- /dev/null +++ b/llvmraytracing/test/dx/intrinsics/get-setting.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint' -S %s 2> %t.stderr | FileCheck %s +; RUN: count 0 < %t.stderr + +declare i32 @_AmdGetSetting_123() + +@debug_global = external global i32 + +define void @main() !lgc.rt.shaderstage !1 { +; CHECK-LABEL: define void @main( +; CHECK-SAME: ) !lgc.rt.shaderstage [[META1:![0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 42, ptr @debug_global, align 4 +; CHECK-NEXT: ret void +; +entry: + %val = call i32 @_AmdGetSetting_123() + store i32 %val, ptr @debug_global + ret void +} + +!gpurt.settings = !{!2} + +!0 = !{i32 3} +!1 = !{i32 0} +!2 = !{i64 123, i64 42} diff --git a/llvmraytracing/test/dx/intrinsics/get-shader-kind.ll b/llvmraytracing/test/dx/intrinsics/get-shader-kind.ll index f974ca77a1..d5b25bae75 100644 --- a/llvmraytracing/test/dx/intrinsics/get-shader-kind.ll +++ b/llvmraytracing/test/dx/intrinsics/get-shader-kind.ll @@ -29,24 +29,22 @@ define float @_cont_RayTCurrent() { ; Note: DXILShaderKind::Miss has value 11 define void @MyMiss(%struct.Payload* %payload) !types !1 !lgc.rt.shaderstage !16 { ; CHECK-LABEL: define %struct.DispatchSystemData @MyMiss -; CHECK-SAME: ([[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META12:![0-9]+]] !continuation.registercount [[META13:![0-9]+]] !continuation [[META14:![0-9]+]] { +; CHECK-SAME: ([[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META12:![0-9]+]] !continuation.registercount [[META5:![0-9]+]] !continuation [[META13:![0-9]+]] { ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_PAYLOAD:%.*]], align 8 ; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[SYSTEM_DATA_ALLOCA]]) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr @PAYLOAD, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 ; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 -; CHECK-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[TMP3]]) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0, i32 0 ; CHECK-NEXT: store i32 11, ptr [[TMP6]], align 4 -; CHECK-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0, i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; CHECK-NEXT: store i32 [[TMP8]], ptr @PAYLOAD, align 4 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(20) @PAYLOAD, align 4 ; CHECK-NEXT: [[TMP9:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; CHECK-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP9]], !continuation.registercount [[META13]] +; CHECK-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP9]], !continuation.registercount [[META5]] ; %1 = call i32 @_AmdGetShaderKind() %2 = getelementptr inbounds %struct.Payload, %struct.Payload* %payload, i32 0, i32 0 diff --git a/llvmraytracing/test/dx/intrinsics/shader-index.ll b/llvmraytracing/test/dx/intrinsics/shader-index.ll index e80987b244..0ac86c8c75 100644 --- a/llvmraytracing/test/dx/intrinsics/shader-index.ll +++ b/llvmraytracing/test/dx/intrinsics/shader-index.ll @@ -19,9 +19,10 @@ define i1 @_cont_ReportHit(%struct.DispatchSystemData* %data, float %t, i32 %hit define void @main() !lgc.rt.shaderstage !24 { ; CHECK-LABEL: define void @main( -; CHECK-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META11:![0-9]+]] !lgc.cps [[META11]] !continuation [[META14:![0-9]+]] { +; CHECK-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META13:![0-9]+]] !lgc.cps [[META13]] !continuation [[META14:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; CHECK-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 ; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; CHECK-NEXT: store i32 0, ptr @debug_global, align 4 @@ -35,19 +36,24 @@ entry: define void @callable(%struct.Payload* %payload) !types !22 !lgc.rt.shaderstage !25 { ; CHECK-LABEL: define void @callable( -; CHECK-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META15:![0-9]+]] !lgc.cps [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] { +; CHECK-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META15:![0-9]+]] !lgc.cps [[META10:![0-9]+]] !continuation [[META16:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_PAYLOAD:%.*]], align 8 -; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = alloca [[STRUCT_PAYLOAD:%.*]], align 8 +; CHECK-NEXT: store [1 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) ; CHECK-NEXT: store i32 [[SHADER_INDEX]], ptr @debug_global, align 4 -; CHECK-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP1]], i32 0, i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -; CHECK-NEXT: store i32 [[TMP3]], ptr @PAYLOAD, align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 2, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP4]]) +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: store i32 [[TMP5]], ptr [[PAYLOAD_ALLOCA]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load [1 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 3, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP7]], [8 x i32] poison, [1 x i32] [[TMP8]]), !continuation.registercount [[META10]] ; CHECK-NEXT: unreachable ; entry: diff --git a/llvmraytracing/test/dx/lower-await.ll b/llvmraytracing/test/dx/lower-await.ll index 0eb1a4d0d1..04e054bf5c 100644 --- a/llvmraytracing/test/dx/lower-await.ll +++ b/llvmraytracing/test/dx/lower-await.ll @@ -39,10 +39,9 @@ define void @simple_await() !continuation.registercount !1 { ; CLEANED-LABEL: define void @simple_await( ; CLEANED-SAME: i64 [[RETURNADDR:%.*]]) !continuation.registercount [[META1:![0-9]+]] !continuation [[META2:![0-9]+]] !continuation.stacksize [[META3:![0-9]+]] !continuation.state [[META3]] { ; CLEANED-NEXT: AllocaSpillBB: -; CLEANED-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANED-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANED-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 -; CLEANED-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) ; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @simple_await.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 ; CLEANED-NEXT: unreachable ; @@ -104,10 +103,9 @@ define void @await_with_arg(i32 %i) !continuation.registercount !1 { ; CLEANED-LABEL: define void @await_with_arg( ; CLEANED-SAME: i64 [[RETURNADDR:%.*]], i32 [[I:%.*]]) !continuation.registercount [[META1]] !continuation [[META6:![0-9]+]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { ; CLEANED-NEXT: AllocaSpillBB: -; CLEANED-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_ARG_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANED-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_ARG_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANED-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 -; CLEANED-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) ; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun_with_arg to i64), i64 ptrtoint (ptr @await_with_arg.resume.0 to i64), i32 [[I]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 ; CLEANED-NEXT: unreachable ; @@ -140,10 +138,9 @@ define i32 @await_with_ret_value() !continuation.registercount !1 { ; CLEANED-LABEL: define void @await_with_ret_value( ; CLEANED-SAME: i64 [[RETURNADDR:%.*]]) !continuation.registercount [[META1]] !continuation [[META7:![0-9]+]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { ; CLEANED-NEXT: AllocaSpillBB: -; CLEANED-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANED-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANED-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 -; CLEANED-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) ; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 ptrtoint (ptr @await_with_ret_value.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 ; CLEANED-NEXT: unreachable ; @@ -175,10 +172,9 @@ define void @wait_await() !continuation.registercount !1 { ; CLEANED-LABEL: define void @wait_await( ; CLEANED-SAME: i64 [[RETURNADDR:%.*]]) !continuation.registercount [[META1]] !continuation [[META8:![0-9]+]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { ; CLEANED-NEXT: AllocaSpillBB: -; CLEANED-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[WAIT_AWAIT_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CLEANED-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[WAIT_AWAIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANED-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 -; CLEANED-NEXT: [[CONT_STATE_STACK_ALLOC:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) ; CLEANED-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 ptrtoint (ptr @async_fun_with_waitmask to i64), i64 -1, i64 ptrtoint (ptr @wait_await.resume.0 to i64)), !continuation.registercount [[META1]], !continuation.returnedRegistercount !1 ; CLEANED-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-call-shader.ll b/llvmraytracing/test/dx/lower-rt-pipeline-call-shader.ll index 93b6619357..1f0a0edfe6 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline-call-shader.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-call-shader.ll @@ -3,9 +3,9 @@ ; RUN: count 0 < %t0.stderr ; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s ; RUN: count 0 < %t1.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,remove-types-metadata' -S %s 2> %t2.stderr | FileCheck -check-prefix=REGISTERBUFFER-CPS %s +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,remove-types-metadata' -S %s 2> %t2.stderr | FileCheck -check-prefix=REGISTERBUFFER-CPS %s ; RUN: count 0 < %t2.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t3.stderr | FileCheck -check-prefix=POSTPROCESS-CPS %s +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t3.stderr | FileCheck -check-prefix=POSTPROCESS-CPS %s ; RUN: count 0 < %t3.stderr target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -14,6 +14,9 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: %struct.DispatchSystemData = type { i32 } %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } %struct.SystemData = type { %struct.DispatchSystemData } +%struct.TraversalData = type { %struct.SystemData, %struct.HitData, <3 x float>, <3 x float>, float } +%struct.HitData = type { float, i32 } +%struct.AnyHitTraversalData = type { %struct.TraversalData, %struct.HitData } %struct.TheirParams = type { i32 } %"class.RWTexture2D >" = type { <4 x float> } @@ -25,6 +28,9 @@ define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !1 ret i32 5 } +; Need _cont_ReportHit to get system data type +declare !types !22 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) + declare %struct.DispatchSystemData @_cont_SetupRayGen() declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) @@ -80,30 +86,32 @@ attributes #0 = { nounwind } !18 = !{!"function", !"void", !14, i32 poison} !19 = !{!"function", !"void", i32 poison, i32 poison, !20} !20 = !{i32 0, %struct.TheirParams poison} +!21 = !{i32 0, %struct.AnyHitTraversalData poison} +!22 = !{!"function", i1 poison, !21, float poison, i32 poison} ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) { ; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define void @main( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !continuation.entry [[META17:![0-9]+]] !continuation.registercount [[META9]] !continuation [[META18:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META16:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META17:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[PARAMS:%.*]] = alloca [[STRUCT_THEIRPARAMS:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP3]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META15:![0-9]+]], !continuation.returnedRegistercount !15 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP4]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP5]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount !14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_STRUCT_DISPATCHSYSTEMDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP6]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_THEIRPARAMS]] poison, ptr [[PARAMS]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP7]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META14:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META13:![0-9]+]] ; ; ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( @@ -112,21 +120,26 @@ attributes #0 = { nounwind } ; ; ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @main( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !lgc.cps [[META9]] !continuation [[META17:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !lgc.cps [[META8]] !continuation [[META16:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PARAMS:%.*]] = alloca [[STRUCT_THEIRPARAMS:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP3]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] (...) @lgc.cps.await__s_struct.DispatchSystemDatas(i32 2, i32 2, i32 5), !continuation.returnedRegistercount !15, !continuation.registercount [[META15:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP3]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [1 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } (...) @lgc.cps.await__sl_s_struct.DispatchSystemDatasa19i32a1i32s(i32 2, i32 2, i32 5, [20 x i32] poison, [1 x i32] [[TMP8]]), !continuation.returnedRegistercount !14, !continuation.registercount [[META14:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP9]], 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [1 x i32] [[TMP10]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP9]], 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_THEIRPARAMS]] poison, ptr [[PARAMS]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP4]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP11]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret void ; @@ -139,20 +152,22 @@ attributes #0 = { nounwind } ; REGISTERBUFFER-CPS-LABEL: define void @main( ; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !lgc.cps [[META8]] !continuation [[META16:![0-9]+]] { ; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; REGISTERBUFFER-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: store i32 undef, ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @main.resume.0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i32 [[TMP1]], i32 5), !continuation.returnedRegistercount !14, !continuation.registercount [[META14:![0-9]+]] +; REGISTERBUFFER-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 undef, 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @main.resume.0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i64 [[TMP1]], i32 5, [20 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !14, !continuation.registercount [[META14:![0-9]+]] ; REGISTERBUFFER-CPS-NEXT: unreachable ; ; ; REGISTERBUFFER-CPS-LABEL: define dso_local void @main.resume.0( -; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META8]] !lgc.cps [[META8]] !continuation [[META16]] { +; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [19 x i32], [1 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META8]] !lgc.cps [[META8]] !continuation [[META16]] { ; REGISTERBUFFER-CPS-NEXT: entryresume.0: -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP3]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP4]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP3]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], 0 ; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; REGISTERBUFFER-CPS-NEXT: ret void ; @@ -167,23 +182,26 @@ attributes #0 = { nounwind } ; POSTPROCESS-CPS-NEXT: AllocaSpillBB: ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[_CONT_SETUPRAYGEN:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; POSTPROCESS-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; POSTPROCESS-CPS-NEXT: store i32 undef, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @main.resume.0) -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i32 [[TMP3]], i32 5), !continuation.returnedRegistercount !14, !continuation.registercount [[META14:![0-9]+]] +; POSTPROCESS-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 undef, 0 +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @main.resume.0 to i64)) +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP3]], i64 [[TMP4]], i32 5, [20 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !14, !continuation.registercount [[META14:![0-9]+]] ; POSTPROCESS-CPS-NEXT: unreachable ; ; ; POSTPROCESS-CPS-LABEL: define dso_local void @main.resume.0( -; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META8]] !lgc.cps [[META8]] !continuation [[META16]] { +; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [19 x i32], [1 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META8]] !lgc.cps [[META8]] !continuation [[META16]] { ; POSTPROCESS-CPS-NEXT: entryresume.0: ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP3]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP4]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], 0 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; POSTPROCESS-CPS-NEXT: ret void ; diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics-hit.ll b/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics-hit.ll index 2635987b31..4dd21e2073 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics-hit.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics-hit.ll @@ -181,19 +181,19 @@ declare !types !46 [4 x <3 x float>] @_cont_WorldToObject4x3(%struct.DispatchSys ; Function Attrs: nounwind define void @RayGen() #3 { ; LOWERRAYTRACINGPIPELINE-LABEL: define void @RayGen( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !lgc.rt.shaderstage [[META19:![0-9]+]] !continuation [[META29:![0-9]+]] !continuation.entry [[META14:![0-9]+]] !continuation.registercount [[META19]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !lgc.rt.shaderstage [[META18:![0-9]+]] !continuation [[META29:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META18]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META26:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META25:![0-9]+]] ; ; DXILCONTPOSTPROCESS-LABEL: define void @RayGen( -; DXILCONTPOSTPROCESS-SAME: ) #[[ATTR5:[0-9]+]] !lgc.rt.shaderstage [[META18:![0-9]+]] !continuation [[META27:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META18]] !continuation.state [[META18]] { +; DXILCONTPOSTPROCESS-SAME: ) #[[ATTR5:[0-9]+]] !lgc.rt.shaderstage [[META18:![0-9]+]] !continuation [[META28:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META18]] !continuation.state [[META18]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() +; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[_CONT_SETUPRAYGEN:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; DXILCONTPOSTPROCESS-NEXT: ret void @@ -206,7 +206,7 @@ define void @RayGen() #3 { ; Function Attrs: nounwind define void @Intersection() #3 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @Intersection( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META30:![0-9]+]] !continuation [[META31:![0-9]+]] !continuation.registercount [[META26]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META30:![0-9]+]] !continuation [[META31:![0-9]+]] !continuation.registercount [[META25]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 @@ -231,21 +231,21 @@ define void @Intersection() #3 { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float 4.000000e+00, i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP12]]), !continuation.registercount [[META26]], !continuation.returnedRegistercount !26 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] @await.struct.AnyHitTraversalData(ptr [[TMP13]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float 4.000000e+00, i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP12]]), !continuation.registercount [[META25]], !continuation.returnedRegistercount !25 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] [[AWAIT_STRUCT_ANYHITTRAVERSALDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP13]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP14]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISEND_I]], label [[TMP16:%.*]], label [[TMP18:%.*]] ; LOWERRAYTRACINGPIPELINE: 16: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], !continuation.registercount [[META26]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], !continuation.registercount [[META25]] ; LOWERRAYTRACINGPIPELINE: 18: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP19]], !continuation.registercount [[META26]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP19]], !continuation.registercount [[META25]] ; ; DXILCONTPOSTPROCESS-LABEL: define void @Intersection( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META28:![0-9]+]] !continuation [[META29:![0-9]+]] !continuation.registercount [[META25:![0-9]+]] !continuation.stacksize [[META30:![0-9]+]] !continuation.state [[META30]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META29:![0-9]+]] !continuation [[META30:![0-9]+]] !continuation.registercount [[META25:![0-9]+]] !continuation.stacksize [[META31:![0-9]+]] !continuation.state [[META31]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 @@ -253,8 +253,9 @@ define void @Intersection() #3 { ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP5]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 @@ -294,11 +295,8 @@ define void @Intersection() #3 { ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 8 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Intersection.resume.0 to i64)) -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP13]], i64 [[TMP14]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float 4.000000e+00, i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META25]], !continuation.returnedRegistercount !25 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Intersection.resume.0 to i64)) +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP11]], i64 [[TMP12]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float 4.000000e+00, i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META25]], !continuation.returnedRegistercount !25 ; DXILCONTPOSTPROCESS-NEXT: unreachable ; %1 = call float @dx.op.rayTMin.f32(i32 153) @@ -313,7 +311,7 @@ define void @Intersection() #3 { ; Function Attrs: nounwind define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !types !47 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @AnyHit( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META32:![0-9]+]] !continuation [[META33:![0-9]+]] !continuation.registercount [[META34:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META32:![0-9]+]] !continuation [[META33:![0-9]+]] !continuation.registercount [[META26:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -326,27 +324,24 @@ define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.Buil ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP8]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load float, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP11]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP8]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP12]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP8]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP14]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP8]], i32 0, i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP65]], ptr [[TMP16]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP18]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[TMP20]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP23]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 @@ -378,35 +373,32 @@ define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.Buil ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] [[TMP37]], i32 [[RES_I5]], 3 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_RAYPAYLOAD]] [[TMP38]], ptr [[TMP8]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP8]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = load float, ptr [[TMP39]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP40]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP39]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP68]], ptr addrspace(20) @PAYLOAD, align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP8]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load float, ptr [[TMP41]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP42]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP41]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP71]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP8]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP44]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP43]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP58]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP8]], i32 0, i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP46]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP47]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP45]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP62]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP46]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP50]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP52]], ptr [[TMP51]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP6]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP54]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP53]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP55]], !continuation.registercount [[META34]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP55]], !continuation.registercount [[META26]] ; ; DXILCONTPOSTPROCESS-LABEL: define void @AnyHit( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META31:![0-9]+]] !continuation [[META32:![0-9]+]] !continuation.registercount [[META33:![0-9]+]] !continuation.state [[META18]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META32:![0-9]+]] !continuation [[META33:![0-9]+]] !continuation.registercount [[META26:![0-9]+]] !continuation.state [[META18]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 @@ -440,12 +432,14 @@ define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.Buil ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 8) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float +; DXILCONTPOSTPROCESS-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP8]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP8]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP9]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_019_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT18]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_019_0_VEC_EXTRACT]] to i32 @@ -504,10 +498,12 @@ define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.Buil ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_RAYPAYLOAD]] [[TMP24]], 2 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_RAYPAYLOAD]] [[TMP24]], 3 ; DXILCONTPOSTPROCESS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT8]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_1_EXTRACT]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_2_EXTRACT]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 8) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_3_EXTRACT]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP33:%.*]] = bitcast float [[DOTFCA_0_EXTRACT8]] to i32 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP33]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP34:%.*]] = bitcast float [[DOTFCA_1_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP34]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_2_EXTRACT]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_3_EXTRACT]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = bitcast i32 [[TMP25]] to float @@ -547,7 +543,7 @@ define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.Buil ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP17]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP30:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP30]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP30]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META26]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; %1 = call float @dx.op.rayTMin.f32(i32 153) @@ -565,7 +561,7 @@ define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.Buil ; Function Attrs: nounwind define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !types !47 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @ClosestHit( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation [[META36:![0-9]+]] !continuation.registercount [[META34]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META34:![0-9]+]] !continuation [[META35:![0-9]+]] !continuation.registercount [[META26]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -576,62 +572,58 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP6]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load float, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP9]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP8]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP6]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP11]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP10]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP6]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr [[TMP12]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP6]], i32 0, i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP55]], ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP16]], ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[HITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = call float @_cont_RayTMin(ptr [[TMP23]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP26]], ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I:%.*]] = load float, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP28]], ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP27]], ptr [[TMP2]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP30]], ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I1:%.*]] = load i32, ptr [[RESPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP6]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = load float, ptr [[TMP31]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP32]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr addrspace(20) @PAYLOAD, align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP6]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load float, ptr [[TMP33]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP34]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP46]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP6]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP36]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP35]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP50]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP6]], i32 0, i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP54]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP39]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP40]], !continuation.registercount [[META34]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP40]], !continuation.registercount [[META26]] ; ; DXILCONTPOSTPROCESS-LABEL: define void @ClosestHit( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META34:![0-9]+]] !continuation [[META35:![0-9]+]] !continuation.registercount [[META33]] !continuation.state [[META18]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META34:![0-9]+]] !continuation [[META35:![0-9]+]] !continuation.registercount [[META26]] !continuation.state [[META18]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 @@ -644,11 +636,11 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT]], ptr [[DOTFCA_1_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 8) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 9) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP7]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_03_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_03_0_VEC_EXTRACT]] to i32 @@ -658,11 +650,11 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = call float @_cont_RayTMin(ptr [[TMP10]]) ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP13]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT6:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP13]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT15:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP15]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP16:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT15]], ptr [[DOTFCA_0_GEP16]], align 4 @@ -670,21 +662,21 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP18:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT17]], ptr [[DOTFCA_1_GEP18]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP14]], ptr [[TMP1]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP17]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT12:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP17]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I1:%.*]] = load i32, ptr [[RESPTR_I]], align 4 -; DXILCONTPOSTPROCESS-NEXT: store float [[TMP3]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store float [[TMP4]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 8) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP18]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP19]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP19]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META26]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; %1 = call float @dx.op.rayTMin.f32(i32 153) diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics.ll b/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics.ll index 116424b268..a558cdf01b 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics.ll @@ -97,7 +97,7 @@ declare !types !36 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %s ; Function Attrs: nounwind define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersectionAttributes* %1) #3 !types !37 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @ClosestHit( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation [[META24:![0-9]+]] !continuation.registercount [[META25:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation [[META24:![0-9]+]] !continuation.registercount [[META20:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -116,26 +116,23 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP14]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load float, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP17]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP14]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP19]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP18]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP14]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP79]], ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP14]], i32 0, i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP91]], ptr [[TMP22]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP24]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr [[TMP25]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[HITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP30]], ptr [[TMP28]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -152,35 +149,35 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = call float @_cont_RayTMin(ptr [[TMP37]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP40]], ptr [[TMP8]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = call float @_cont_RayTCurrent(ptr [[TMP39]], ptr [[TMP8]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = call i32 @_cont_RayFlags(ptr [[TMP42]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP45]], ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = call i32 @_cont_InstanceIndex(ptr [[TMP44]], ptr [[TMP4]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP48]], ptr [[TMP5]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP47]], ptr [[TMP5]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP51]], ptr [[TMP6]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = call i32 @_cont_PrimitiveIndex(ptr [[TMP50]], ptr [[TMP6]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP54]], ptr [[TMP9]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP53]], ptr [[TMP9]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[K:%.*]] = extractelement <3 x float> [[TMP55]], i8 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP57]], ptr [[TMP10]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP56]], ptr [[TMP10]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[L:%.*]] = extractelement <3 x float> [[TMP58]], i8 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP60]], ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = call [4 x <3 x float>] @_cont_ObjectToWorld4x3(ptr [[TMP59]], ptr [[TMP2]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [4 x <3 x float>] [[TMP61]], ptr [[TMP13]], align 4 @@ -188,35 +185,34 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; LOWERRAYTRACINGPIPELINE-NEXT: [[COL_GEP_LOAD2:%.*]] = load <3 x float>, ptr [[COL_GEP1]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[M:%.*]] = extractelement <3 x float> [[COL_GEP_LOAD2]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP63]], ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = call [4 x <3 x float>] @_cont_WorldToObject4x3(ptr [[TMP62]], ptr [[TMP3]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [4 x <3 x float>] [[TMP64]], ptr [[TMP12]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[COL_GEP:%.*]] = getelementptr [4 x <3 x float>], ptr [[TMP12]], i32 0, i8 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[COL_GEP_LOAD:%.*]] = load <3 x float>, ptr [[COL_GEP]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[N:%.*]] = extractelement <3 x float> [[COL_GEP_LOAD]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP65]], ptr [[TMP7]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = call i32 @_cont_HitKind(ptr [[SYSTEM_DATA_ALLOCA]], ptr [[TMP7]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP14]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = load float, ptr [[TMP67]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP68]], ptr @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP67]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP78]], ptr addrspace(20) @PAYLOAD, align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP14]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = load float, ptr [[TMP69]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP70]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP69]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP82]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP14]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP72]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP71]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP86]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP14]], i32 0, i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP74]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP73]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP90]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP75]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP76]], !continuation.registercount [[META25]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP76]], !continuation.registercount [[META20]] ; ; DXILCONTPOSTPROCESS-LABEL: define void @ClosestHit( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META21:![0-9]+]] !continuation [[META22:![0-9]+]] !continuation.registercount [[META23:![0-9]+]] !continuation.state [[META18:![0-9]+]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation [[META23:![0-9]+]] !continuation.registercount [[META20:![0-9]+]] !continuation.state [[META18:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -236,11 +232,11 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load float, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load float, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 8) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 9) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP15]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_016_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT14]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = bitcast float [[DOTSROA_016_0_VEC_EXTRACT]] to i32 @@ -262,7 +258,7 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = call float @_cont_RayTMin(ptr [[TMP26]]) ; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT24:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP29]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP25:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP7]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT24]], ptr [[DOTFCA_0_GEP25]], align 4 @@ -273,7 +269,7 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; DXILCONTPOSTPROCESS-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP32:%.*]] = call i32 @_cont_RayFlags(ptr [[TMP31]]) ; DXILCONTPOSTPROCESS-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP34:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP34:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT40:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP34]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP41:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT40]], ptr [[DOTFCA_0_GEP41]], align 4 @@ -282,7 +278,7 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT42]], ptr [[DOTFCA_1_GEP43]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP35:%.*]] = call i32 @_cont_InstanceIndex(ptr [[TMP33]], ptr [[TMP3]]) ; DXILCONTPOSTPROCESS-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP37:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP37:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT36:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP37]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP37:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT36]], ptr [[DOTFCA_0_GEP37]], align 4 @@ -291,7 +287,7 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT38]], ptr [[DOTFCA_1_GEP39]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP38:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP36]], ptr [[TMP4]]) ; DXILCONTPOSTPROCESS-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP40:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP40:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT32:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP40]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP33:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT32]], ptr [[DOTFCA_0_GEP33]], align 4 @@ -300,7 +296,7 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT34]], ptr [[DOTFCA_1_GEP35]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP41:%.*]] = call i32 @_cont_PrimitiveIndex(ptr [[TMP39]], ptr [[TMP5]]) ; DXILCONTPOSTPROCESS-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP43:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP43:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT20:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP43]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP21:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP8]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT20]], ptr [[DOTFCA_0_GEP21]], align 4 @@ -310,7 +306,7 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; DXILCONTPOSTPROCESS-NEXT: [[TMP44:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP42]], ptr [[TMP8]]) ; DXILCONTPOSTPROCESS-NEXT: [[K:%.*]] = extractelement <3 x float> [[TMP44]], i8 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP46:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP46:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT17:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP46]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP18:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP9]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT17]], ptr [[DOTFCA_0_GEP18]], align 4 @@ -320,7 +316,7 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; DXILCONTPOSTPROCESS-NEXT: [[TMP47:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP45]], ptr [[TMP9]]) ; DXILCONTPOSTPROCESS-NEXT: [[L:%.*]] = extractelement <3 x float> [[TMP47]], i8 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP49:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP49:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT48:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP49]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP49:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT48]], ptr [[DOTFCA_0_GEP49]], align 4 @@ -334,7 +330,7 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [4 x <3 x float>] [[TMP50]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[M:%.*]] = extractelement <3 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP52:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP52:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT44:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP52]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP45:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT44]], ptr [[DOTFCA_0_GEP45]], align 4 @@ -347,7 +343,7 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT8:%.*]] = extractvalue [4 x <3 x float>] [[TMP53]], 2 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT9:%.*]] = extractvalue [4 x <3 x float>] [[TMP53]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[N:%.*]] = extractelement <3 x float> [[DOTFCA_0_EXTRACT5]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP54:%.*]] = call [[STRUCT_HITDATA]] @_cont_GetCommittedState(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP54:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT28:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP54]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP29:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP6]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT28]], ptr [[DOTFCA_0_GEP29]], align 4 @@ -355,16 +351,16 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP31:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP6]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT30]], ptr [[DOTFCA_1_GEP31]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP55:%.*]] = call i32 @_cont_HitKind(ptr [[SYSTEM_DATA_ALLOCA]], ptr [[TMP6]]) -; DXILCONTPOSTPROCESS-NEXT: store float [[TMP11]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store float [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 8) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP56]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP57:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP57]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META23]] +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP57]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META20]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; %a = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 0) diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-large-payload.ll b/llvmraytracing/test/dx/lower-rt-pipeline-large-payload.ll index 1d354802ae..b027b2b936 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline-large-payload.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-large-payload.ll @@ -7,7 +7,7 @@ ; RUN: count 0 < %t0.stderr ; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,remove-types-metadata' -S 2> %t1.stderr | FileCheck -check-prefix=CLEANUP %s ; RUN: count 0 < %t1.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,remove-types-metadata' -S %s 2> %t2.stderr | FileCheck -check-prefix=CLEANUP-CPS %s +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,remove-types-metadata' -S %s 2> %t2.stderr | FileCheck -check-prefix=CLEANUP-CPS %s ; RUN: count 0 < %t2.stderr ; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t3.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s ; RUN: count 0 < %t3.stderr @@ -30,6 +30,9 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: @"\01?Scene@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 +; Need _cont_ReportHit to get system data type +declare !types !206 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) + declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #3 declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #4 declare !types !200 void @dx.op.traceRay.struct.SmallPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.SmallPayload*) @@ -156,8 +159,10 @@ attributes #3 = { nounwind memory(none) } !202 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !102} !203 = !{!"function", !"void", !103, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} !204 = !{!"function", !"void", !100} +!205 = !{i32 0, %struct.AnyHitTraversalData poison} +!206 = !{!"function", i1 poison, !205, float poison, i32 poison} ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @Miss( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !continuation.registercount [[META18:![0-9]+]] !continuation [[META19:![0-9]+]] !continuation.stacksize [[META20:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META15:![0-9]+]] !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.stacksize [[META19:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[P1:%.*]] = alloca [[STRUCT_SMALLPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[P2:%.*]] = alloca [[STRUCT_MEDIUMPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[P3:%.*]] = alloca [[STRUCT_LARGEPAYLOAD:%.*]], align 8 @@ -167,130 +172,132 @@ attributes #3 = { nounwind memory(none) } ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP5]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SMALLPAYLOAD]] zeroinitializer, ptr [[P1]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MEDIUMPAYLOAD]] zeroinitializer, ptr [[P2]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_LARGEPAYLOAD]] zeroinitializer, ptr [[P3]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[T1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T1]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T1]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP9]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 -1, 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[P1]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load [1 x i32], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP9]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META18]], !continuation.returnedRegistercount !18 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP10]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[P1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_STRUCT_DISPATCHSYSTEMDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP11]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SMALLPAYLOAD]] poison, ptr [[P1]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[P1]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP13]], ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP11]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[P1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP15]], ptr [[TMP9]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT10:%.*]] ; LOWERRAYTRACINGPIPELINE: .split10: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I1:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I1:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP21]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I2:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I1]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I3:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I2]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I4:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I3]], i64 -1, 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[PAYLOAD_SPILL_ALLOCA]] to i32 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_MEDIUMPAYLOAD]], ptr [[P2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP18]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP18]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP18]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I4]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount !14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP25]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[PAYLOAD_SPILL_ALLOCA]] to i32 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_MEDIUMPAYLOAD]], ptr [[P2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP24]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP29]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr addrspace(32) [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP23]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr addrspace(32) [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I4]]), !continuation.registercount [[META13:![0-9]+]], !continuation.returnedRegistercount !13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_STRUCT_DISPATCHSYSTEMDATA]](ptr [[TMP36]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MEDIUMPAYLOAD]] poison, ptr [[P2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_MEDIUMPAYLOAD]], ptr [[P2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP27]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP28]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[TMP28]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP28]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP26]], ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_MEDIUMPAYLOAD]], ptr [[P2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr [[TMP39]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(32) [[TMP38]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP44]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP38]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(32) [[TMP49]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP50]], ptr [[TMP48]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP37]], ptr [[TMP21]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT9:%.*]] ; LOWERRAYTRACINGPIPELINE: .split9: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I5:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP36]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I5:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP53]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I5]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I7:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I6]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I8:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I7]], i64 -1, 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[PAYLOAD_SPILL_ALLOCA]] to i32 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_LARGEPAYLOAD]], ptr [[P3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[TMP38]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[TMP39]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[TMP39]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr [[TMP39]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP45]], ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[TMP39]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP47]], ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[TMP39]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I8]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP50]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[PAYLOAD_SPILL_ALLOCA]] to i32 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP54]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_LARGEPAYLOAD]], ptr [[P3]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP60]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[TMP56]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP61]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP45]], ptr addrspace(32) [[TMP55]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP55]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP61]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP66]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP67]], ptr addrspace(32) [[TMP65]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP55]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[TMP61]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP70]], ptr addrspace(32) [[TMP68]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP55]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[TMP61]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP73]], ptr addrspace(32) [[TMP71]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I8]]), !continuation.registercount [[META13]], !continuation.returnedRegistercount !13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_STRUCT_DISPATCHSYSTEMDATA]](ptr [[TMP74]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_LARGEPAYLOAD]] poison, ptr [[P3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_LARGEPAYLOAD]], ptr [[P3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = getelementptr i32, ptr [[TMP52]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP53]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP55]], ptr [[TMP54]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[TMP53]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP57]], ptr [[TMP56]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = getelementptr i32, ptr [[TMP53]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP58]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP53]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP61]], ptr [[TMP60]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr i32, ptr [[TMP53]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP63]], ptr [[TMP62]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP51]], ptr [[TMP36]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_LARGEPAYLOAD]], ptr [[P3]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP81]], ptr [[TMP77]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = getelementptr inbounds i32, ptr [[TMP77]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(32) [[TMP76]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP82]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = getelementptr inbounds i32, ptr [[TMP82]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP87:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP76]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(32) [[TMP87]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP88]], ptr [[TMP86]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP89:%.*]] = getelementptr inbounds i32, ptr [[TMP82]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP90:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP76]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = load i32, ptr addrspace(32) [[TMP90]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP91]], ptr [[TMP89]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP92:%.*]] = getelementptr inbounds i32, ptr [[TMP82]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP93:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP76]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(32) [[TMP93]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP94]], ptr [[TMP92]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP95:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP75]], ptr [[TMP53]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load [1 x i32], ptr [[TMP64]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP65]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP66]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP67]], !continuation.registercount [[META18]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP99:%.*]] = load i32, ptr [[TMP96]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP99]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP101:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP100]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP101]], !continuation.registercount [[META17]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( @@ -299,117 +306,118 @@ attributes #3 = { nounwind memory(none) } ; ; ; CLEANUP-LABEL: define void @Miss( -; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !continuation.registercount [[META18:![0-9]+]] !continuation [[META19:![0-9]+]] !continuation.stacksize [[META20:![0-9]+]] !continuation.state [[META21:![0-9]+]] { +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META15:![0-9]+]] !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.stacksize [[META19:![0-9]+]] !continuation.state [[META20:![0-9]+]] { ; CLEANUP-NEXT: AllocaSpillBB: -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 -; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 1 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 28) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 -; CLEANUP-NEXT: [[TMP2:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP2]], 0 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT_SPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 2 -; CLEANUP-NEXT: store i32 [[DOTFCA_0_EXTRACT]], ptr addrspace(32) [[DOTFCA_0_EXTRACT_SPILL_ADDR]], align 4 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; CLEANUP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 +; CLEANUP-NEXT: store i32 [[TMP1]], ptr addrspace(32) [[DOTSPILL_ADDR]], align 4 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-NEXT: [[T1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; CLEANUP-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T1]]) -; CLEANUP-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; CLEANUP-NEXT: [[TMP3:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) +; CLEANUP-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T1]]) +; CLEANUP-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-NEXT: [[TMP2:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) ; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; CLEANUP-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; CLEANUP-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 -1, 5 -; CLEANUP-NEXT: [[DOTFCA_0_INSERT30:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 -; CLEANUP-NEXT: store [1 x i32] [[DOTFCA_0_INSERT30]], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 28) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @Miss.resume.0 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META18]], !continuation.returnedRegistercount !18 +; CLEANUP-NEXT: store i32 0, ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @Miss.resume.0 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @Miss.resume.0( -; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META16]] !lgc.rt.shaderstage [[META17]] !continuation.registercount [[META18]] !continuation [[META19]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META15]] !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META17]] !continuation [[META18]] { ; CLEANUP-NEXT: entryresume.0: -; CLEANUP-NEXT: call void @lgc.cps.free(i32 28) -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 -; CLEANUP-NEXT: [[TMP2:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT32:%.*]] = extractvalue [1 x i32] [[TMP2]], 0 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT13:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 28) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CLEANUP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-NEXT: [[T110:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; CLEANUP-NEXT: [[T29:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T110]]) -; CLEANUP-NEXT: [[T38:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T29]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; CLEANUP-NEXT: [[TMP3:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) -; CLEANUP-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT13]], 0 +; CLEANUP-NEXT: [[T29:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T110]]) +; CLEANUP-NEXT: [[T38:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T29]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-NEXT: [[TMP2:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) +; CLEANUP-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 ; CLEANUP-NEXT: [[SYS_DATA_I2:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I1_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[TRAV_DATA_I3:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I2]], 0 ; CLEANUP-NEXT: [[TRAV_DATA2_I4:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I3]], i64 -1, 5 -; CLEANUP-NEXT: [[TMP4:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 -; CLEANUP-NEXT: store i32 [[TMP4]], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: store i32 0, ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; CLEANUP-NEXT: store i32 0, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; CLEANUP-NEXT: store i32 0, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 28) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @Miss.resume.1 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I4]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount !14 +; CLEANUP-NEXT: [[TMP3:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 +; CLEANUP-NEXT: store i32 [[TMP3]], ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP4:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: store i32 0, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; CLEANUP-NEXT: store i32 0, ptr addrspace(32) [[TMP4]], align 4 +; CLEANUP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP4]], i32 1 +; CLEANUP-NEXT: store i32 0, ptr addrspace(32) [[TMP7]], align 4 +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @Miss.resume.1 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I4]]), !continuation.registercount [[META13:![0-9]+]], !continuation.returnedRegistercount !13 ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @Miss.resume.1( -; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META16]] !lgc.rt.shaderstage [[META17]] !continuation.registercount [[META14]] !continuation [[META19]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META15]] !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META13]] !continuation [[META18]] { ; CLEANUP-NEXT: entryresume.1: -; CLEANUP-NEXT: call void @lgc.cps.free(i32 28) -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT15:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 28) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CLEANUP-NEXT: [[TMP1:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(32) [[TMP1]], align 4 +; CLEANUP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 1 +; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(32) [[TMP6]], align 4 +; CLEANUP-NEXT: [[TMP8:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-NEXT: [[T17:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; CLEANUP-NEXT: [[T26:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T17]]) -; CLEANUP-NEXT: [[T35:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T26]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; CLEANUP-NEXT: [[TMP5:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T35]]) -; CLEANUP-NEXT: [[DIS_DATA_I5_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT15]], 0 +; CLEANUP-NEXT: [[T26:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T17]]) +; CLEANUP-NEXT: [[T35:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T26]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T35]]) +; CLEANUP-NEXT: [[DIS_DATA_I5_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT12]], 0 ; CLEANUP-NEXT: [[SYS_DATA_I6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I5_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[TRAV_DATA_I7:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I6]], 0 ; CLEANUP-NEXT: [[TRAV_DATA2_I8:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I7]], i64 -1, 5 -; CLEANUP-NEXT: [[TMP6:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 -; CLEANUP-NEXT: store i32 [[TMP6]], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: store i32 0, ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; CLEANUP-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; CLEANUP-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; CLEANUP-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 -; CLEANUP-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 -; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 28) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @Miss.resume.2 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I8]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 +; CLEANUP-NEXT: [[TMP10:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 +; CLEANUP-NEXT: store i32 [[TMP10]], ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP11:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: store i32 0, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; CLEANUP-NEXT: store i32 0, ptr addrspace(32) [[TMP11]], align 4 +; CLEANUP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP11]], i32 1 +; CLEANUP-NEXT: store i32 0, ptr addrspace(32) [[TMP14]], align 4 +; CLEANUP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP11]], i32 2 +; CLEANUP-NEXT: store i32 0, ptr addrspace(32) [[TMP15]], align 4 +; CLEANUP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP11]], i32 3 +; CLEANUP-NEXT: store i32 0, ptr addrspace(32) [[TMP16]], align 4 +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @Miss.resume.2 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I8]]), !continuation.registercount [[META13]], !continuation.returnedRegistercount !13 ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @Miss.resume.2( -; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META16]] !lgc.rt.shaderstage [[META17]] !continuation.registercount [[META14]] !continuation [[META19]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META15]] !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META13]] !continuation [[META18]] { ; CLEANUP-NEXT: entryresume.2: -; CLEANUP-NEXT: call void @lgc.cps.free(i32 28) -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT17:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 28) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CLEANUP-NEXT: [[TMP1:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(32) [[TMP1]], align 4 +; CLEANUP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 1 +; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(32) [[TMP6]], align 4 +; CLEANUP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 2 +; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(32) [[TMP8]], align 4 +; CLEANUP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 3 +; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(32) [[TMP10]], align 4 +; CLEANUP-NEXT: [[TMP12:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 2 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT_RELOAD:%.*]] = load i32, ptr addrspace(32) [[DOTFCA_0_EXTRACT_RELOAD_ADDR]], align 4 -; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 1 +; CLEANUP-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 +; CLEANUP-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(32) [[DOTRELOAD_ADDR]], align 4 +; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[DOTFCA_0_EXTRACT_RELOAD]], 0 -; CLEANUP-NEXT: store [1 x i32] [[DOTFCA_0_INSERT]], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_INSERT12:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT17]], 0 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT12]]), !continuation.registercount [[META18]] +; CLEANUP-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT14]], 0 +; CLEANUP-NEXT: call void @lgc.cps.free(i32 28) +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] ; CLEANUP-NEXT: unreachable ; ; @@ -419,108 +427,129 @@ attributes #3 = { nounwind memory(none) } ; ; ; CLEANUP-CPS-LABEL: define void @Miss( -; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.attribute.size [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META18:![0-9]+]] !continuation [[META19:![0-9]+]] !continuation.stacksize [[META20:![0-9]+]] { +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [27 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.attribute.size [[META15:![0-9]+]] !lgc.rt.shaderstage [[META16:![0-9]+]] !lgc.cps [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.stacksize [[META19:![0-9]+]] { ; CLEANUP-CPS-NEXT: AllocaSpillBB: -; CLEANUP-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 20) +; CLEANUP-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) ; CLEANUP-CPS-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 -; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[PAYLOAD]], 0 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT_SPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 +; CLEANUP-CPS-NEXT: store i32 [[PAYLOAD_FCA_0_EXTRACT]], ptr addrspace(32) [[PAYLOAD_FCA_0_EXTRACT_SPILL_ADDR]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 0, 0 ; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) ; CLEANUP-CPS-NEXT: [[T1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; CLEANUP-CPS-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T1]]) -; CLEANUP-CPS-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) -; CLEANUP-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T1]]) +; CLEANUP-CPS-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-CPS-NEXT: [[TMP0:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) +; CLEANUP-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_EXTRACT]], 0 ; CLEANUP-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; CLEANUP-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; CLEANUP-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 -1, 5 -; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT28:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 -; CLEANUP-CPS-NEXT: store [1 x i32] [[DOTFCA_0_INSERT28]], ptr @PAYLOAD, align 4 -; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @Miss.resume.0) -; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i32 [[TMP2]], i32 5), !continuation.returnedRegistercount !18, !continuation.registercount [[META18]] +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT13:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @Miss.resume.0) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i64 [[TMP1]], i32 5, [30 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT13]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 ; CLEANUP-CPS-NEXT: unreachable ; ; ; CLEANUP-CPS-LABEL: define dso_local void @Miss.resume.0( -; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.attribute.size [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META18]] !continuation [[META19]] !continuation.stacksize [[META20]] { +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [27 x i32], [1 x i32] } [[TMP3:%.*]]) !lgc.rt.attribute.size [[META15]] !lgc.rt.shaderstage [[META16]] !lgc.cps [[META17]] !continuation [[META18]] !continuation.stacksize [[META19]] { ; CLEANUP-CPS-NEXT: entryresume.0: -; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 20) +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 24) ; CLEANUP-CPS-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 -; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = load [1 x i32], ptr @PAYLOAD, align 4 -; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT30:%.*]] = extractvalue [1 x i32] [[TMP5]], 0 -; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [1 x i32] } [[TMP3]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP5]], 0 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [1 x i32] } [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT45:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 ; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-CPS-NEXT: [[T110:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; CLEANUP-CPS-NEXT: [[T29:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T110]]) -; CLEANUP-CPS-NEXT: [[T38:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T29]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) -; CLEANUP-CPS-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[T29:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T110]]) +; CLEANUP-CPS-NEXT: [[T38:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T29]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) +; CLEANUP-CPS-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT45]], 0 ; CLEANUP-CPS-NEXT: [[SYS_DATA_I2:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I1_FCA_0_INSERT]], 0 ; CLEANUP-CPS-NEXT: [[TRAV_DATA_I3:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I2]], 0 ; CLEANUP-CPS-NEXT: [[TRAV_DATA2_I4:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I3]], i64 -1, 5 -; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 -; CLEANUP-CPS-NEXT: store i32 [[TMP7]], ptr @PAYLOAD, align 4 -; CLEANUP-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @Miss.resume.1) -; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i32 [[TMP8]], i32 5), !continuation.returnedRegistercount !14, !continuation.registercount [[META14:![0-9]+]] +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 +; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP8]] to ptr addrspace(32) +; CLEANUP-CPS-NEXT: store i32 0, ptr addrspace(32) [[TMP9]], align 4 +; CLEANUP-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP9]], i32 1 +; CLEANUP-CPS-NEXT: store i32 0, ptr addrspace(32) [[TMP12]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT17:%.*]] = insertvalue [2 x i32] poison, i32 [[TMP8]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[DOTFCA_0_INSERT17]], i32 0, 1 +; CLEANUP-CPS-NEXT: [[TMP13:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @Miss.resume.1) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i64 [[TMP13]], i32 5, [30 x i32] poison, [2 x i32] [[DOTFCA_1_INSERT]]), !continuation.registercount [[META13:![0-9]+]], !continuation.returnedRegistercount !13 ; CLEANUP-CPS-NEXT: unreachable ; ; ; CLEANUP-CPS-LABEL: define dso_local void @Miss.resume.1( -; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.attribute.size [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META18]] !continuation [[META19]] !continuation.stacksize [[META20]] { +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [27 x i32], [2 x i32] } [[TMP3:%.*]]) !lgc.rt.attribute.size [[META15]] !lgc.rt.shaderstage [[META16]] !lgc.cps [[META17]] !continuation [[META18]] !continuation.stacksize [[META19]] { ; CLEANUP-CPS-NEXT: entryresume.1: -; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 20) +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 24) ; CLEANUP-CPS-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 -; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT13:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP3]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT18:%.*]] = extractvalue [2 x i32] [[TMP5]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[TMP5]], 1 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT18]] to ptr addrspace(32) +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(32) [[TMP7]], align 4 +; CLEANUP-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(32) [[TMP11]], align 4 +; CLEANUP-CPS-NEXT: [[TMP13:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT18]] to ptr addrspace(32) +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT47:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 ; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-CPS-NEXT: [[T17:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; CLEANUP-CPS-NEXT: [[T26:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T17]]) -; CLEANUP-CPS-NEXT: [[T35:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T26]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T35]]) -; CLEANUP-CPS-NEXT: [[DIS_DATA_I5_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT13]], 0 +; CLEANUP-CPS-NEXT: [[T26:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T17]]) +; CLEANUP-CPS-NEXT: [[T35:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T26]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-CPS-NEXT: [[TMP14:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T35]]) +; CLEANUP-CPS-NEXT: [[DIS_DATA_I5_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT47]], 0 ; CLEANUP-CPS-NEXT: [[SYS_DATA_I6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I5_FCA_0_INSERT]], 0 ; CLEANUP-CPS-NEXT: [[TRAV_DATA_I7:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I6]], 0 ; CLEANUP-CPS-NEXT: [[TRAV_DATA2_I8:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I7]], i64 -1, 5 -; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 -; CLEANUP-CPS-NEXT: store i32 [[TMP9]], ptr @PAYLOAD, align 4 -; CLEANUP-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 -; CLEANUP-CPS-NEXT: store i32 0, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 -; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @Miss.resume.2) -; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i32 [[TMP10]], i32 5), !continuation.returnedRegistercount !14, !continuation.registercount [[META14]] +; CLEANUP-CPS-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 +; CLEANUP-CPS-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP15]] to ptr addrspace(32) +; CLEANUP-CPS-NEXT: store i32 0, ptr addrspace(32) [[TMP16]], align 4 +; CLEANUP-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP16]], i32 1 +; CLEANUP-CPS-NEXT: store i32 0, ptr addrspace(32) [[TMP19]], align 4 +; CLEANUP-CPS-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP16]], i32 2 +; CLEANUP-CPS-NEXT: store i32 0, ptr addrspace(32) [[TMP20]], align 4 +; CLEANUP-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP16]], i32 3 +; CLEANUP-CPS-NEXT: store i32 0, ptr addrspace(32) [[TMP21]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT23:%.*]] = insertvalue [2 x i32] poison, i32 [[TMP15]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT26:%.*]] = insertvalue [2 x i32] [[DOTFCA_0_INSERT23]], i32 0, 1 +; CLEANUP-CPS-NEXT: [[TMP22:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @Miss.resume.2) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i64 [[TMP22]], i32 5, [30 x i32] poison, [2 x i32] [[DOTFCA_1_INSERT26]]), !continuation.registercount [[META13]], !continuation.returnedRegistercount !13 ; CLEANUP-CPS-NEXT: unreachable ; ; ; CLEANUP-CPS-LABEL: define dso_local void @Miss.resume.2( -; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.attribute.size [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META18]] !continuation [[META19]] !continuation.stacksize [[META20]] { +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [27 x i32], [2 x i32] } [[TMP3:%.*]]) !lgc.rt.attribute.size [[META15]] !lgc.rt.shaderstage [[META16]] !lgc.cps [[META17]] !continuation [[META18]] !continuation.stacksize [[META19]] { ; CLEANUP-CPS-NEXT: entryresume.2: -; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 20) +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 24) ; CLEANUP-CPS-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 -; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 -; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 4 -; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 4), align 4 -; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 5), align 4 -; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT15:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP3]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT27:%.*]] = extractvalue [2 x i32] [[TMP5]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_EXTRACT29:%.*]] = extractvalue [2 x i32] [[TMP5]], 1 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT27]] to ptr addrspace(32) +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(32) [[TMP7]], align 4 +; CLEANUP-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(32) [[TMP11]], align 4 +; CLEANUP-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(32) [[TMP13]], align 4 +; CLEANUP-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(32) [[TMP15]], align 4 +; CLEANUP-CPS-NEXT: [[TMP17:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT27]] to ptr addrspace(32) +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT49:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 ; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT_RELOAD:%.*]] = load i32, ptr addrspace(32) [[PAYLOAD_FCA_0_EXTRACT_RELOAD_ADDR]], align 4 ; CLEANUP-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 1 ; CLEANUP-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 -; CLEANUP-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 undef, 0 -; CLEANUP-CPS-NEXT: store [1 x i32] [[DOTFCA_0_INSERT]], ptr @PAYLOAD, align 4 -; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT11:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT15]], 0 -; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 20) -; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT11]]) +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT44:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT49]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT_RELOAD]], 0 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 24) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 3, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT44]], [27 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] ; CLEANUP-CPS-NEXT: unreachable ; ; @@ -535,37 +564,31 @@ attributes #3 = { nounwind memory(none) } ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 28 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], 16 ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP5]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[TMP6]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP2]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP1]], 24 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP7]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP8]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_0_EXTRACT]], ptr addrspace(21) [[TMP9]], align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(21) [[TMP9]], align 4 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[T1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; DXILCONTPOSTPROCESS-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T1]]) -; DXILCONTPOSTPROCESS-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; DXILCONTPOSTPROCESS-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T1]]) +; DXILCONTPOSTPROCESS-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) ; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 -1, 5 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT30:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT30_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT30]], 0 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_0_INSERT30_FCA_0_EXTRACT]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 28 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Miss.resume.0 to i64)) -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP13]], i64 [[TMP14]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Miss.resume.0 to i64)) +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP11]], i64 [[TMP12]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; @@ -576,42 +599,30 @@ attributes #3 = { nounwind memory(none) } ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -28 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[TMP5]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT32:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT13:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[T110:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; DXILCONTPOSTPROCESS-NEXT: [[T29:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T110]]) -; DXILCONTPOSTPROCESS-NEXT: [[T38:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T29]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) -; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT13]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[T29:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T110]]) +; DXILCONTPOSTPROCESS-NEXT: [[T38:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T29]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) +; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I2:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I1_FCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I3:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I2]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA2_I4:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I3]], i64 -1, 5 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 1) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP9]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP11]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 12 -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP15]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP16]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], 28 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP18]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Miss.resume.1 to i64)) -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP19]], i64 [[TMP20]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I4]]), !continuation.registercount [[META13:![0-9]+]], !continuation.returnedRegistercount !13 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP7]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = add i32 [[TMP5]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP8]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP10]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Miss.resume.1 to i64)) +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP11]], i64 [[TMP12]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I4]]), !continuation.registercount [[META13:![0-9]+]], !continuation.returnedRegistercount !13 ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; @@ -622,64 +633,47 @@ attributes #3 = { nounwind memory(none) } ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -28 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_MEDIUMPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 1) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(21) [[TMP6]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = add i32 [[TMP3]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP8]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 12 -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP15]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(21) [[TMP16]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT15:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[T17:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; DXILCONTPOSTPROCESS-NEXT: [[T26:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[T17]]) -; DXILCONTPOSTPROCESS-NEXT: [[T35:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[T26]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T35]]) -; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I5_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT15]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[T26:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T17]]) +; DXILCONTPOSTPROCESS-NEXT: [[T35:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T26]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T35]]) +; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I5_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT12]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I5_FCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I7:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I6]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA2_I8:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I7]], i64 -1, 5 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 1) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = add i32 [[TMP20]], 8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP21]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP22]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP23]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = add i32 [[TMP25]], 12 -; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP26]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP27]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP28]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP31:%.*]] = add i32 [[TMP30]], 16 -; DXILCONTPOSTPROCESS-NEXT: [[TMP32:%.*]] = inttoptr i32 [[TMP31]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP32]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP33]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], 20 -; DXILCONTPOSTPROCESS-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP36]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP37]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP38]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP39:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP40:%.*]] = add i32 [[TMP39]], 28 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP40]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP41:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP42:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Miss.resume.2 to i64)) -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP41]], i64 [[TMP42]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I8]]), !continuation.registercount [[META13]], !continuation.returnedRegistercount !13 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP15]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP16]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = add i32 [[TMP14]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP17]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP18]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP19]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP14]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP20]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP21]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP22]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = add i32 [[TMP14]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP23]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP24]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP25]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @Miss.resume.2 to i64)) +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP26]], i64 [[TMP27]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I8]]), !continuation.registercount [[META13]], !continuation.returnedRegistercount !13 ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; @@ -690,50 +684,41 @@ attributes #3 = { nounwind memory(none) } ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -28 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_LARGEPAYLOAD_ATTR_MAX_2_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 1) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(21) [[TMP6]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = add i32 [[TMP3]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP8]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 12 -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP15]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(21) [[TMP16]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 16 -; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP20]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP21]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(21) [[TMP22]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = add i32 [[TMP25]], 20 -; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP26]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP27]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(21) [[TMP28]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT17:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = add i32 [[TMP3]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP12]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP13]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(21) [[TMP14]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = add i32 [[TMP3]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP16]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP17]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(21) [[TMP18]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; DXILCONTPOSTPROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP4]], 24 -; DXILCONTPOSTPROCESS-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP31]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP32]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP33:%.*]] = add i32 [[TMP4]], 16 -; DXILCONTPOSTPROCESS-NEXT: [[TMP34:%.*]] = inttoptr i32 [[TMP33]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP34]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP35]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[DOTFCA_0_EXTRACT_RELOAD]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[DOTFCA_0_INSERT]], 0 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_0_INSERT_FCA_0_EXTRACT]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT12:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT17]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP36:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP36]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT12]]), !continuation.registercount [[META17]] +; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = add i32 [[TMP2]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP21]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP22]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP23]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = add i32 [[TMP2]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP24]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP25]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP26]], align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT14]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], -28 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP28]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP29]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-simple-call-shader.ll b/llvmraytracing/test/dx/lower-rt-pipeline-simple-call-shader.ll index 4e770e4ae2..69840cc200 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline-simple-call-shader.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-simple-call-shader.ll @@ -12,13 +12,13 @@ ; RUN: count 0 < %t3.stderr ; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t4.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s ; RUN: count 0 < %t4.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,remove-types-metadata' \ +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,remove-types-metadata' \ ; RUN: -S %s 2> %t5.stderr | FileCheck -check-prefix=CLEANUP-CPS %s ; RUN: count 0 < %t5.stderr -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,remove-types-metadata' \ +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,remove-types-metadata' \ ; RUN: -S %s 2> %t6.stderr | FileCheck -check-prefix=REGISTERBUFFER-CPS %s ; RUN: count 0 < %t6.stderr -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ ; RUN: -S %s 2> %t7.stderr | FileCheck -check-prefix=POSTPROCESS-CPS %s ; RUN: count 0 < %t7.stderr @@ -30,6 +30,8 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: %struct.SystemData = type { %struct.DispatchSystemData } %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } %struct.MyParams = type { i32 } +%struct.HitData = type { <3 x float>, <3 x float>, float, i32 } +%struct.AnyHitTraversalData = type { %struct.TraversalData, %struct.HitData } %"class.RWTexture2D >" = type { <4 x float> } @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 @@ -50,6 +52,9 @@ define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !1 ret i32 5 } +; Need _cont_ReportHit to get system data type +declare !types !22 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) + ; Function Attrs: nounwind memory(none) declare !types !22 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone %data) #1 @@ -112,6 +117,8 @@ attributes #1 = { alwaysinline } !20 = !{i32 0, %struct.MyParams poison} !21 = !{!"function", !"void", i32 poison, i32 poison, !20} !22 = !{!"function", <3 x i32> poison, !16} +!23 = !{i32 0, %struct.AnyHitTraversalData poison} +!24 = !{!"function", i1 poison, !23, float poison, i32 poison} ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) { @@ -119,39 +126,37 @@ attributes #1 = { alwaysinline } ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @called( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META17:![0-9]+]] !continuation.registercount [[META15:![0-9]+]] !continuation [[META18:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META14:![0-9]+]] !continuation [[META17:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP6]], ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP6]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META15]], !continuation.returnedRegistercount !15 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP7]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_STRUCT_DISPATCHSYSTEMDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP8]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MYPARAMS]] poison, ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP8]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP12]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() -; LOWERRAYTRACINGPIPELINE-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP12]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP15]], !continuation.registercount [[META15]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP17]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() +; LOWERRAYTRACINGPIPELINE-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP18]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP23]], !continuation.registercount [[META14]] ; ; ; CLEANUP-LABEL: define i32 @_cont_GetLocalRootIndex( @@ -160,40 +165,37 @@ attributes #1 = { alwaysinline } ; ; ; CLEANUP-LABEL: define void @called( -; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META17:![0-9]+]] !continuation.registercount [[META15:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.stacksize [[META19:![0-9]+]] !continuation.state [[META19]] { +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META14:![0-9]+]] !continuation [[META17:![0-9]+]] !continuation.stacksize [[META18:![0-9]+]] !continuation.state [[META18]] { ; CLEANUP-NEXT: AllocaSpillBB: -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; CLEANUP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; CLEANUP-NEXT: store i32 [[TMP2]], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 ptrtoint (ptr @called.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META15]], !continuation.returnedRegistercount !15 +; CLEANUP-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 ptrtoint (ptr @called.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @called.resume.0( -; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META17]] !continuation.registercount [[META15]] !continuation [[META18]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META14]] !continuation [[META17]] { ; CLEANUP-NEXT: entryresume.0: -; CLEANUP-NEXT: call void @lgc.cps.free(i32 8) -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) +; CLEANUP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; CLEANUP-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP3]], i8 0 -; CLEANUP-NEXT: [[TMP4:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() -; CLEANUP-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP4]], i8 0 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: store i32 [[TMP2]], ptr @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP2:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; CLEANUP-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP2]], i8 0 +; CLEANUP-NEXT: [[TMP3:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() +; CLEANUP-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP3]], i8 0 +; CLEANUP-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 ; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META15]] +; CLEANUP-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] ; CLEANUP-NEXT: unreachable ; ; @@ -205,16 +207,14 @@ attributes #1 = { alwaysinline } ; REGISTERBUFFER-LABEL: define void @called( ; REGISTERBUFFER-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META14:![0-9]+]] !continuation [[META17:![0-9]+]] !continuation.stacksize [[META18:![0-9]+]] !continuation.state [[META18]] { ; REGISTERBUFFER-NEXT: AllocaSpillBB: -; REGISTERBUFFER-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; REGISTERBUFFER-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; REGISTERBUFFER-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; REGISTERBUFFER-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 ; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; REGISTERBUFFER-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) +; REGISTERBUFFER-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 ; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; REGISTERBUFFER-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; REGISTERBUFFER-NEXT: store i32 [[TMP2]], ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; REGISTERBUFFER-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 ; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 ptrtoint (ptr @called.resume.0 to i64), [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 ; REGISTERBUFFER-NEXT: unreachable ; @@ -222,20 +222,19 @@ attributes #1 = { alwaysinline } ; REGISTERBUFFER-LABEL: define dso_local void @called.resume.0( ; REGISTERBUFFER-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META14]] !continuation [[META17]] { ; REGISTERBUFFER-NEXT: entryresume.0: -; REGISTERBUFFER-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; REGISTERBUFFER-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) +; REGISTERBUFFER-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 ; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[TMP3:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; REGISTERBUFFER-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP3]], i8 0 -; REGISTERBUFFER-NEXT: [[TMP4:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() -; REGISTERBUFFER-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP4]], i8 0 -; REGISTERBUFFER-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) -; REGISTERBUFFER-NEXT: store i32 [[TMP2]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-NEXT: [[TMP2:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; REGISTERBUFFER-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP2]], i8 0 +; REGISTERBUFFER-NEXT: [[TMP3:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() +; REGISTERBUFFER-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP3]], i8 0 +; REGISTERBUFFER-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 ; REGISTERBUFFER-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; REGISTERBUFFER-NEXT: call void @lgc.cps.free(i32 8) ; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] ; REGISTERBUFFER-NEXT: unreachable ; @@ -251,8 +250,9 @@ attributes #1 = { alwaysinline } ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 -; POSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 +; POSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) ; POSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 ; POSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 @@ -261,11 +261,8 @@ attributes #1 = { alwaysinline } ; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 ; POSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) @REGISTERS, align 4 ; POSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 8 -; POSTPROCESS-NEXT: store i32 [[TMP7]], ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[TMP9:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @called.resume.0 to i64)) -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP8]], i64 [[TMP9]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @called.resume.0 to i64)) +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP6]], i64 [[TMP7]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 ; POSTPROCESS-NEXT: unreachable ; ; @@ -278,21 +275,21 @@ attributes #1 = { alwaysinline } ; POSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -8 -; POSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 -; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POSTPROCESS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; POSTPROCESS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 -; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP7]], align 4 -; POSTPROCESS-NEXT: [[TMP8:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) -; POSTPROCESS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 -; POSTPROCESS-NEXT: [[TMP9:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[SYSTEM_DATA_ALLOCA]]) -; POSTPROCESS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 -; POSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 +; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP5]], align 4 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP6]], i8 0 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP7]], i8 0 +; POSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) @REGISTERS, align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], -8 +; POSTPROCESS-NEXT: store i32 [[TMP9]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP10]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] ; POSTPROCESS-NEXT: unreachable @@ -304,34 +301,43 @@ attributes #1 = { alwaysinline } ; ; ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @called( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META15:![0-9]+]] !continuation [[META18:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !lgc.cps [[META14:![0-9]+]] !continuation [[META17:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [1 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP4]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] (...) @lgc.cps.await__s_struct.DispatchSystemDatas(i32 2, i32 2, i32 5), !continuation.returnedRegistercount !15, !continuation.registercount [[META15]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_MYPARAMS]] poison, ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP5]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load [1 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } (...) @lgc.cps.await__sl_s_struct.DispatchSystemDatasa8i32a1i32s(i32 2, i32 2, i32 5, [9 x i32] poison, [1 x i32] [[TMP14]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP15]], 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [1 x i32] [[TMP16]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP15]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_MYPARAMS]] poison, ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP11]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP17]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: .split: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP11]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 2, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP12]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP24]], i8 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP25]], i8 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP26]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP19]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = load [1 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 3, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP32]], [8 x i32] poison, [1 x i32] [[TMP33]]), !continuation.registercount [[META14]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; ; @@ -341,40 +347,44 @@ attributes #1 = { alwaysinline } ; ; ; CLEANUP-CPS-LABEL: define void @called( -; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META15:![0-9]+]] !continuation [[META18:![0-9]+]] { +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !lgc.cps [[META14:![0-9]+]] !continuation [[META17:![0-9]+]] { ; CLEANUP-CPS-NEXT: AllocaSpillBB: ; CLEANUP-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) ; CLEANUP-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 -; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[PAYLOAD]], 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; CLEANUP-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; CLEANUP-CPS-NEXT: store i32 undef, ptr @PAYLOAD, align 4 -; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) -; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i32 [[TMP1]], i32 5), !continuation.returnedRegistercount !15, !continuation.registercount [[META15]] +; CLEANUP-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[SYSTEM_DATA_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @called.resume.0) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i64 [[TMP0]], i32 5, [9 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT4]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 ; CLEANUP-CPS-NEXT: unreachable ; ; ; CLEANUP-CPS-LABEL: define dso_local void @called.resume.0( -; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META17]] !lgc.cps [[META15]] !continuation [[META18]] { +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [8 x i32], [1 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META16]] !lgc.cps [[META14]] !continuation [[META17]] { ; CLEANUP-CPS-NEXT: entryresume.0: -; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; CLEANUP-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], ptr [[TMP4]], align 4 +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, align 8 +; CLEANUP-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], ptr [[TMP4]], align 4 ; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) -; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr @PAYLOAD, align 4 -; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP6]], 0 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP7]], 0 ; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP5]], i32 0, i32 0 ; CLEANUP-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 -; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP4]]) -; CLEANUP-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP7]], i8 0 -; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP4]]) -; CLEANUP-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 -; CLEANUP-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-CPS-NEXT: store i32 [[TMP6]], ptr @PAYLOAD, align 4 -; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP8]]) +; CLEANUP-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 +; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[TMP11:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP10]]) +; CLEANUP-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT9:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT10]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 ; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) -; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 2, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 3, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [8 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] ; CLEANUP-CPS-NEXT: unreachable ; ; @@ -384,40 +394,44 @@ attributes #1 = { alwaysinline } ; ; ; REGISTERBUFFER-CPS-LABEL: define void @called( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META14:![0-9]+]] { +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !continuation [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META14:![0-9]+]] { ; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: ; REGISTERBUFFER-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) ; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[PAYLOAD]], 0 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; REGISTERBUFFER-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: store i32 undef, ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i32 [[TMP1]], i32 5), !continuation.returnedRegistercount !14, !continuation.registercount [[META14]] +; REGISTERBUFFER-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[SYSTEM_DATA_FCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @called.resume.0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i64 [[TMP0]], i32 5, [9 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT4]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 ; REGISTERBUFFER-CPS-NEXT: unreachable ; ; ; REGISTERBUFFER-CPS-LABEL: define dso_local void @called.resume.0( -; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !continuation [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META14]] { +; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [8 x i32], [1 x i32] } [[TMP3:%.*]]) !continuation [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META14]] { ; REGISTERBUFFER-CPS-NEXT: entryresume.0: -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; REGISTERBUFFER-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], ptr [[TMP4]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, align 8 +; REGISTERBUFFER-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], ptr [[TMP4]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP6]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP7]], 0 ; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP5]], i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP4]]) -; REGISTERBUFFER-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP7]], i8 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP4]]) -; REGISTERBUFFER-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 -; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP6]], ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP8]]) +; REGISTERBUFFER-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP10]]) +; REGISTERBUFFER-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT9:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT10]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 2, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 3, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [8 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] ; REGISTERBUFFER-CPS-NEXT: unreachable ; ; @@ -427,49 +441,57 @@ attributes #1 = { alwaysinline } ; ; ; POSTPROCESS-CPS-LABEL: define void @called( -; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META14:![0-9]+]] { +; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !continuation [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META14:![0-9]+]] { ; POSTPROCESS-CPS-NEXT: AllocaSpillBB: ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 -; POSTPROCESS-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP4]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP0:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP1]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP0]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i32 0 +; POSTPROCESS-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP3]], align 4 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; POSTPROCESS-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; POSTPROCESS-CPS-NEXT: store i32 undef, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @called.resume.0) -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 2, {} poison, i32 [[TMP5]], i32 5), !continuation.returnedRegistercount !14, !continuation.registercount [[META14]] +; POSTPROCESS-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[SYSTEM_DATA_FCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @called.resume.0 to i64)) +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP4]], i64 [[TMP5]], i32 5, [9 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT4]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 ; POSTPROCESS-CPS-NEXT: unreachable ; ; ; POSTPROCESS-CPS-LABEL: define dso_local void @called.resume.0( -; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !continuation [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META14]] { +; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [8 x i32], [1 x i32] } [[TMP3:%.*]]) !continuation [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META14]] { ; POSTPROCESS-CPS-NEXT: entryresume.0: -; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, align 8 ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], ptr [[TMP4]], align 4 +; POSTPROCESS-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], ptr [[TMP4]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], -8 -; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP7]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP8]], 0 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(21) -; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP8]], i32 0 -; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP9]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP4]]) -; POSTPROCESS-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP10]], i8 0 -; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP4]]) -; POSTPROCESS-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP7]], ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 2, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 +; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP11]]) +; POSTPROCESS-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP12]], i8 0 +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP13]]) +; POSTPROCESS-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP14]], i8 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT9:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT10]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP16]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP17]], i32 [[TMP18]], i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [8 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] ; POSTPROCESS-CPS-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-small-payload-field.ll b/llvmraytracing/test/dx/lower-rt-pipeline-small-payload-field.ll index c03281c76b..15b1889fbb 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline-small-payload-field.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-small-payload-field.ll @@ -104,48 +104,47 @@ attributes #3 = { nounwind memory(none) } !29 = !{i32 0, %struct.AnyHitTraversalData poison} !30 = !{!"function", i32 poison, !27} ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @Miss( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META20:![0-9]+]] !continuation.registercount [[META21:![0-9]+]] !continuation [[META22:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META20:![0-9]+]] !continuation.registercount [[META18:![0-9]+]] !continuation [[META21:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_PAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_PAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr ([[STRUCT_PAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_PAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP8]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr getelementptr ([[STRUCT_PAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: store i16 17, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i16 [[TMP19]], ptr getelementptr inbounds ([[STRUCT_PAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP21]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr getelementptr inbounds ([[STRUCT_PAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i32 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load [[TMP0]], ptr [[TMP24]], align 1 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[TMP0]] [[TMP25]], ptr getelementptr ([[STRUCT_PAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 3), align 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP26]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP27]], !continuation.registercount [[META21]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i8, ptr [[TMP18]], align 1 +; LOWERRAYTRACINGPIPELINE-NEXT: store i8 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[TMP18]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1 +; LOWERRAYTRACINGPIPELINE-NEXT: store i8 [[TMP23]], ptr addrspace(20) getelementptr (i8, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), i32 1), align 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP24]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP24]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1 +; LOWERRAYTRACINGPIPELINE-NEXT: store i8 [[TMP29]], ptr addrspace(20) getelementptr (i8, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), i32 4), align 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[TMP24]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 +; LOWERRAYTRACINGPIPELINE-NEXT: store i8 [[TMP31]], ptr addrspace(20) getelementptr (i8, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), i32 5), align 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP32]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP33]], !continuation.registercount [[META18]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( diff --git a/llvmraytracing/test/dx/lower-rt-pipeline.ll b/llvmraytracing/test/dx/lower-rt-pipeline.ll index cd4f278f45..dc2d8e0a00 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline.ll @@ -3,11 +3,11 @@ ; RUN: count 0 < %t0.stderr ; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s ; RUN: count 0 < %t1.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,remove-types-metadata' -S %s 2> %t2.stderr | FileCheck -check-prefix=REGISTERBUFFER-CPS %s +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,remove-types-metadata' -S %s 2> %t2.stderr | FileCheck -check-prefix=REGISTERBUFFER-CPS %s ; RUN: count 0 < %t2.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t3.stderr | FileCheck -check-prefix=POSTPROCESS-CPS %s +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t3.stderr | FileCheck -check-prefix=POSTPROCESS %s ; RUN: count 0 < %t3.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata,sroa' -S %s 2> %t4.stderr | FileCheck -check-prefix=SROA-CPS %s +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t4.stderr | FileCheck -check-prefix=POSTPROCESS-CPS %s ; RUN: count 0 < %t4.stderr target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -500,7 +500,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define void @MyRayGen( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.entry [[META14:![0-9]+]] !continuation.registercount [[META23]] !continuation [[META36:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META22]] !continuation [[META35:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) @@ -510,9 +510,9 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP4]] to ptr ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP5]]) #[[ATTR1:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA37:![0-9]+]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA36:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 @@ -520,60 +520,54 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR3:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP10]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP14]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META34:![0-9]+]], !continuation.returnedRegistercount !34 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP21]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META33:![0-9]+]], !continuation.returnedRegistercount !33 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_STRUCT_DISPATCHSYSTEMDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP21]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP27]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP27]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP27]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP22]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA37]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA36]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() ; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP35]], i8 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() ; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP36]], i8 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP37]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE]](i32 216, [[DX_TYPES_HANDLE]] [[TMP37]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = extractelement <4 x float> [[TMP34]], i64 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = extractelement <4 x float> [[TMP34]], i64 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[TMP34]], i64 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = extractelement <4 x float> [[TMP34]], i64 3 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP38]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP39]], float [[TMP40]], float [[TMP41]], float [[TMP42]], i8 15) ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP5]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META33:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META32:![0-9]+]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyClosestHitShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META41:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META39:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META40:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 @@ -581,29 +575,23 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP5]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP16]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[HITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -619,29 +607,25 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = insertelement <4 x float> [[TMP31]], float 1.000000e+00, i64 3 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP32]], ptr [[TMP33]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[TMP34]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[TMP35]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[TMP34]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[TMP38]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP40]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr i32, ptr [[TMP38]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP34]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP38]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP40]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[TMP38]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[TMP38]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[TMP38]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP44]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP44]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP45]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP46]], !continuation.registercount [[META34]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP46]], !continuation.registercount [[META33]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @MyAnyHitShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META42:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META43:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META42:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -658,31 +642,25 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP14]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP18]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP18]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP18]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP25]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP25]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP26]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP28]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP32]], ptr [[TMP30]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -724,177 +702,147 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], 1.000000e+00 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = fcmp fast ogt float [[TMP39]], -1.000000e+00 ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP40]], label [[TMP43:%.*]], label [[TMP88:%.*]] -; LOWERRAYTRACINGPIPELINE: 43: +; LOWERRAYTRACINGPIPELINE: 38: ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP34]], ptr [[TMP33]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP41]], label [[TMP44:%.*]], label [[TMP66:%.*]] -; LOWERRAYTRACINGPIPELINE: 44: +; LOWERRAYTRACINGPIPELINE: 39: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP45]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[TMP46]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[TMP47]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr [[TMP46]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr i32, ptr [[TMP50]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP52]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = getelementptr i32, ptr [[TMP50]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP54]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = getelementptr i32, ptr [[TMP50]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[TMP46]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP50]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP52]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[TMP50]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP47]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP54]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = getelementptr inbounds i32, ptr [[TMP50]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP56]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr [[TMP57]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP58]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP60]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP56]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP51]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP53]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP62]], ptr [[TMP61]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP10]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP63]], ptr [[TMP64]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP65]], !continuation.registercount [[META34]] -; LOWERRAYTRACINGPIPELINE: 66: +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP65]], !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE: 56: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP67]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP68]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP69]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP71]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP68]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = getelementptr i32, ptr [[TMP72]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP74]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = getelementptr i32, ptr [[TMP72]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP75]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP76]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = getelementptr i32, ptr [[TMP72]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP68]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP71]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[TMP68]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP72]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP74]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[TMP72]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP69]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP76]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[TMP72]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP78]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP80:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP79]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP81]], ptr [[TMP80]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP83:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP82]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP78]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP70]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP83:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP73]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP84]], ptr [[TMP83]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP85:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP9]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP85]], ptr [[TMP86]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP87:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP87]], !continuation.registercount [[META34]] -; LOWERRAYTRACINGPIPELINE: 88: +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP87]], !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE: 73: ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP42]], label [[TMP89:%.*]], label [[TMP134:%.*]] -; LOWERRAYTRACINGPIPELINE: 89: +; LOWERRAYTRACINGPIPELINE: 74: ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP41]], label [[TMP90:%.*]], label [[TMP112:%.*]] -; LOWERRAYTRACINGPIPELINE: 90: +; LOWERRAYTRACINGPIPELINE: 75: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_IgnoreHit(ptr [[TMP91]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP93:%.*]] = getelementptr i32, ptr [[TMP92]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP94:%.*]] = getelementptr i32, ptr [[TMP93]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP94]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP95]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP96:%.*]] = getelementptr i32, ptr [[TMP92]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP97:%.*]] = getelementptr i32, ptr [[TMP96]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP98:%.*]] = load i32, ptr [[TMP97]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP98]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP99:%.*]] = getelementptr i32, ptr [[TMP96]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP100:%.*]] = load i32, ptr [[TMP99]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP100]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP101:%.*]] = getelementptr i32, ptr [[TMP96]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP92]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP95]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP96:%.*]] = getelementptr inbounds i32, ptr [[TMP92]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP98:%.*]] = load i32, ptr [[TMP96]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP98]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = getelementptr inbounds i32, ptr [[TMP96]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP100:%.*]] = load i32, ptr [[TMP81]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP100]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP101:%.*]] = getelementptr inbounds i32, ptr [[TMP96]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP102:%.*]] = load i32, ptr [[TMP101]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP102]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP103:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP104:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP105:%.*]] = load i32, ptr [[TMP103]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP105]], ptr [[TMP104]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP106:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP107:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP108:%.*]] = load i32, ptr [[TMP106]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP102]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP93:%.*]] = load i32, ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP93]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP94:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP107:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP108:%.*]] = load i32, ptr [[TMP94]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP108]], ptr [[TMP107]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP109:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP8]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP109]], ptr [[TMP110]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP111:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP111]], !continuation.registercount [[META34]] -; LOWERRAYTRACINGPIPELINE: 112: +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP111]], !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE: 92: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_IgnoreHit(ptr [[TMP113]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP114:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP115:%.*]] = getelementptr i32, ptr [[TMP114]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP116:%.*]] = getelementptr i32, ptr [[TMP115]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP117:%.*]] = load i32, ptr [[TMP116]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP117]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP118:%.*]] = getelementptr i32, ptr [[TMP114]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP119:%.*]] = getelementptr i32, ptr [[TMP118]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP120:%.*]] = load i32, ptr [[TMP119]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP120]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP121:%.*]] = getelementptr i32, ptr [[TMP118]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP122:%.*]] = load i32, ptr [[TMP121]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP122]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP123:%.*]] = getelementptr i32, ptr [[TMP118]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP117:%.*]] = load i32, ptr [[TMP114]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP117]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP118:%.*]] = getelementptr inbounds i32, ptr [[TMP114]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP120:%.*]] = load i32, ptr [[TMP118]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP120]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP99:%.*]] = getelementptr inbounds i32, ptr [[TMP118]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP122:%.*]] = load i32, ptr [[TMP99]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP122]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP123:%.*]] = getelementptr inbounds i32, ptr [[TMP118]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP124:%.*]] = load i32, ptr [[TMP123]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP124]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP125:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP126:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP127:%.*]] = load i32, ptr [[TMP125]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP127]], ptr [[TMP126]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP128:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP129:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP130:%.*]] = load i32, ptr [[TMP128]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP124]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP104:%.*]] = load i32, ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP104]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP103:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP129:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP130:%.*]] = load i32, ptr [[TMP103]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP130]], ptr [[TMP129]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP131:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP7]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP131]], ptr [[TMP132]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP133:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP133]], !continuation.registercount [[META34]] -; LOWERRAYTRACINGPIPELINE: 134: +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP133]], !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE: 109: ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP34]], ptr [[TMP33]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP136:%.*]] = getelementptr i32, ptr [[TMP135]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP137:%.*]] = getelementptr i32, ptr [[TMP136]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP138:%.*]] = load i32, ptr [[TMP137]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP138]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP139:%.*]] = getelementptr i32, ptr [[TMP135]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP140:%.*]] = getelementptr i32, ptr [[TMP139]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP141:%.*]] = load i32, ptr [[TMP140]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP141]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP142:%.*]] = getelementptr i32, ptr [[TMP139]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP143:%.*]] = load i32, ptr [[TMP142]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP143]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP144:%.*]] = getelementptr i32, ptr [[TMP139]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP138:%.*]] = load i32, ptr [[TMP135]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP138]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP139:%.*]] = getelementptr inbounds i32, ptr [[TMP135]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP141:%.*]] = load i32, ptr [[TMP139]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP141]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP115:%.*]] = getelementptr inbounds i32, ptr [[TMP139]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP143:%.*]] = load i32, ptr [[TMP115]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP143]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP144:%.*]] = getelementptr inbounds i32, ptr [[TMP139]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP145:%.*]] = load i32, ptr [[TMP144]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP145]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP146:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP147:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP148:%.*]] = load i32, ptr [[TMP146]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP148]], ptr [[TMP147]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP149:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP150:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP151:%.*]] = load i32, ptr [[TMP149]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP145]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP121:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP121]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP119:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP150:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP151:%.*]] = load i32, ptr [[TMP119]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP151]], ptr [[TMP150]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP152:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP6]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP152]], ptr [[TMP153]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP154:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP154]], !continuation.registercount [[META34]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP154]], !continuation.registercount [[META33]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @MyIntersectionShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META44:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META45:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META43:![0-9]+]] !continuation.registercount [[META32]] !continuation [[META44:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 4 @@ -916,19 +864,17 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE: callAHit.i: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP8]]), !continuation.registercount [[META33]], !continuation.returnedRegistercount !33 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] @await.struct.AnyHitTraversalData(ptr [[TMP9]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP8]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] [[AWAIT_STRUCT_ANYHITTRAVERSALDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP9]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP10]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE: isEnd.i: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 @@ -939,17 +885,17 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISEND_I1]], label [[TMP21:%.*]], label [[TMP23:%.*]] -; LOWERRAYTRACINGPIPELINE: 21: +; LOWERRAYTRACINGPIPELINE: 19: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP22]], !continuation.registercount [[META33]] -; LOWERRAYTRACINGPIPELINE: 23: +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP22]], !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE: 21: ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP24]], !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP24]], !continuation.registercount [[META32]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @MyIntersectionShader2( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META44]] !continuation.registercount [[META33]] !continuation [[META46:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META43]] !continuation.registercount [[META32]] !continuation [[META45:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]], align 4 @@ -971,19 +917,17 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE: callAHit.i: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[TMP8]]), !continuation.registercount [[META33]], !continuation.returnedRegistercount !33 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] @await.struct.AnyHitTraversalData(ptr [[TMP9]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[TMP8]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] [[AWAIT_STRUCT_ANYHITTRAVERSALDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP9]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP10]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE: isEnd.i: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 @@ -994,59 +938,51 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISEND_I1]], label [[TMP21:%.*]], label [[TMP23:%.*]] -; LOWERRAYTRACINGPIPELINE: 21: +; LOWERRAYTRACINGPIPELINE: 19: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP22]], !continuation.registercount [[META33]] -; LOWERRAYTRACINGPIPELINE: 23: +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP22]], !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE: 21: ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP24]], !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP24]], !continuation.registercount [[META32]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyMissShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META47:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META48:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META46:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META47:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> , ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP16]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP20]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP20]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP27]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP28]], !continuation.registercount [[META34]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP28]], !continuation.registercount [[META33]] ; ; ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define i32 @_cont_GetContinuationStackAddr( @@ -1129,8 +1065,9 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyRayGen( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !lgc.cps [[META23]] !continuation [[META36:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !lgc.cps [[META22]] !continuation [[META35:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 @@ -1139,9 +1076,9 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP4]] to ptr ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP5]]) #[[ATTR1:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA37:![0-9]+]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA36:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 @@ -1149,489 +1086,547 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR_I:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR3:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP13]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP10]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP16]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP14]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] (...) @lgc.cps.await__s_struct.DispatchSystemDatas(i32 4, i32 4, i32 5), !continuation.returnedRegistercount !34, !continuation.registercount [[META34:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP11]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP23]], ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP26]], ptr [[TMP24]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } (...) @lgc.cps.await__sl_s_struct.DispatchSystemDatasa33i32a10i32s(i32 4, i32 4, i32 5, [36 x i32] poison, [10 x i32] [[TMP27]]), !continuation.returnedRegistercount !33, !continuation.registercount [[META33:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP28]], 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[TMP29]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP28]], 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP22]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP26]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP26]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[TMP26]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP21]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP31]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP33]], ptr [[TMP38]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP38]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP44]], ptr [[TMP42]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP38]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP47]], ptr [[TMP45]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP30]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: .split: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA37]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP34]], i8 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP35]], i8 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP36]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = extractelement <4 x float> [[TMP33]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = extractelement <4 x float> [[TMP33]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP40:%.*]] = extractelement <4 x float> [[TMP33]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[TMP33]], i64 3 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP37]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP38]], float [[TMP39]], float [[TMP40]], float [[TMP41]], i8 15) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP48:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA36]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP49:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP49]], i8 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP50:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP50]], i8 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP51:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP52:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE]](i32 216, [[DX_TYPES_HANDLE]] [[TMP51]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP53:%.*]] = extractelement <4 x float> [[TMP48]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP54:%.*]] = extractelement <4 x float> [[TMP48]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP55:%.*]] = extractelement <4 x float> [[TMP48]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP56:%.*]] = extractelement <4 x float> [[TMP48]], i64 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP52]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP53]], float [[TMP54]], float [[TMP55]], float [[TMP56]], i8 15) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP5]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret void ; ; ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyClosestHitShader( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40:![0-9]+]] !lgc.cps [[META41:![0-9]+]] !continuation [[META42:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META39:![0-9]+]] !lgc.cps [[META40:![0-9]+]] !continuation [[META41:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP4]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP16]], ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP19]], ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP20]], ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP21]], ptr [[HITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP26]], ptr [[TMP24]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[HITATTRS]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load <2 x float>, ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP12]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = fsub fast float 1.000000e+00, [[TMP13]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP12]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = fsub fast float [[TMP14]], [[TMP15]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = insertelement <4 x float> undef, float [[TMP16]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP17]], float [[TMP13]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP15]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = insertelement <4 x float> [[TMP19]], float 1.000000e+00, i64 3 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP20]], ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP25]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP22]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP26]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP28]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP26]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP30]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[TMP26]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP32]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP33]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP34]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[HITATTRS]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = load <2 x float>, ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[TMP28]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = fsub fast float 1.000000e+00, [[TMP29]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[TMP28]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = fsub fast float [[TMP30]], [[TMP31]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = insertelement <4 x float> undef, float [[TMP32]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP29]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float 1.000000e+00, i64 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP36]], ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP39]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP38]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP45]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP40]], ptr [[TMP44]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP50]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP51]], ptr [[TMP49]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP54]], ptr [[TMP52]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP56:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP55]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP57:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 3, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP56]], [33 x i32] poison, [10 x i32] [[TMP57]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; ; ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyAnyHitShader( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META43:![0-9]+]] !lgc.cps [[META40]] !continuation [[META44:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_HITDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[HIT_ATTRS:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META42:![0-9]+]] !lgc.cps [[META39]] !continuation [[META43:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP13]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP14]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP10]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP15]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP20]], ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP20]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP26]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP21]], ptr [[TMP25]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP28]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP29]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP31]], ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[HIT_ATTRS]], ptr [[HITATTRSALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = load <4 x float>, ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP10]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = load <4 x float>, ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I1:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I1]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_1_I2:%.*]] = load float, ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2_I3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I1]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_1_I2:%.*]] = load float, ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2_I3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_2_I4:%.*]] = load float, ptr [[RESPTR_2_I3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3_I5:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3_I5:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 0, i32 2 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_3_I6:%.*]] = load float, ptr [[RESPTR_3_I5]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[RES_1_I2]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[RES_2_I4]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[RES_3_I6]], i32 2 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I9]], i8 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_1_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_1_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 1, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_1_I:%.*]] = load float, ptr [[RESPTR_1_I]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 1, i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_2_I:%.*]] = load float, ptr [[RESPTR_2_I]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 1, i32 2 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_3_I:%.*]] = load float, ptr [[RESPTR_3_I]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x float> undef, float [[RES_1_I]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[RES_2_I]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[RES_3_I]], i32 2 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I10:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I10]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I10]], ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 2 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I11:%.*]] = load float, ptr [[RESPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = fmul fast float [[RES_I11]], [[EXTRACT]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = fadd fast float [[TMP26]], [[EXTRACT1]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = fcmp fast ogt float [[TMP27]], 0.000000e+00 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], 1.000000e+00 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = fcmp fast ogt float [[TMP27]], -1.000000e+00 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP28]], label [[TMP31:%.*]], label [[TMP76:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: 31: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP22]], ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP29]], label [[TMP32:%.*]], label [[TMP54:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: 32: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP33]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[TMP34]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[TMP35]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP37]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[TMP34]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[TMP38]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP40]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = getelementptr i32, ptr [[TMP38]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP42]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[TMP38]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP44]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP45]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP47]], ptr [[TMP46]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP48]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP50]], ptr [[TMP49]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP51:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP51]], ptr [[TMP52]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP53:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP53]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE-CPS: 54: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP55]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP56]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP58:%.*]] = getelementptr i32, ptr [[TMP57]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP59:%.*]] = load i32, ptr [[TMP58]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP59]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP56]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP61:%.*]] = getelementptr i32, ptr [[TMP60]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP61]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP62]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr [[TMP60]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP64]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[TMP60]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP66]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP67]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP69]], ptr [[TMP68]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP70]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP72]], ptr [[TMP71]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP73:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = fmul fast float [[RES_I11]], [[EXTRACT]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP43:%.*]] = fadd fast float [[TMP42]], [[EXTRACT1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP43]], 0.000000e+00 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP45:%.*]] = fcmp fast ogt float [[TMP43]], 1.000000e+00 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP46:%.*]] = fcmp fast ogt float [[TMP43]], -1.000000e+00 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP44]], label [[TMP47:%.*]], label [[TMP106:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 39: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP38]], ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP45]], label [[TMP48:%.*]], label [[TMP77:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 40: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP49]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP10]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP50]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP51]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, ptr [[TMP50]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP58]], ptr [[TMP56]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[TMP56]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[TMP57]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP61]], ptr [[TMP59]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[TMP56]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[TMP57]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP65]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP52]], ptr [[TMP64]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP53:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP53]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP55:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP54]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP62]], ptr [[TMP55]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP73:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP8]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP73]], ptr [[TMP74]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP75:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP75]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP76:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 20, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP75]], [8 x i32] poison, [10 x i32] [[TMP76]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; LOWERRAYTRACINGPIPELINE-CPS: 61: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP78]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP10]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP79]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP66]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP85:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP86:%.*]] = getelementptr inbounds i32, ptr [[TMP79]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP86]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP67]], ptr [[TMP85]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[TMP85]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[TMP86]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP70]], ptr [[TMP68]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP93:%.*]] = getelementptr inbounds i32, ptr [[TMP85]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP94:%.*]] = getelementptr inbounds i32, ptr [[TMP86]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP94]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP80]], ptr [[TMP93]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP81:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP81]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP82:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP83:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP82]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP84]], ptr [[TMP83]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP102:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP102]], ptr [[TMP103]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP104:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP105:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 20, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP104]], [8 x i32] poison, [10 x i32] [[TMP105]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE-CPS: 76: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP30]], label [[TMP77:%.*]], label [[TMP122:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: 77: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP29]], label [[TMP78:%.*]], label [[TMP100:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: 78: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP79]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP81:%.*]] = getelementptr i32, ptr [[TMP80]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr [[TMP81]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP82]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP83]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP84:%.*]] = getelementptr i32, ptr [[TMP80]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP85:%.*]] = getelementptr i32, ptr [[TMP84]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP86]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP87:%.*]] = getelementptr i32, ptr [[TMP84]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP87]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP88]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP89:%.*]] = getelementptr i32, ptr [[TMP84]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP89]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP90]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP91:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP92:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP91]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP93]], ptr [[TMP92]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP94:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP95:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP94]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP96]], ptr [[TMP95]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP97:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP97]], ptr [[TMP98]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP99:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP99]]) +; LOWERRAYTRACINGPIPELINE-CPS: 82: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP46]], label [[TMP107:%.*]], label [[TMP168:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 83: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP45]], label [[TMP108:%.*]], label [[TMP138:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 84: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP109]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP10]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP110]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP87]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP116:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP117:%.*]] = getelementptr inbounds i32, ptr [[TMP110]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP117]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP90]], ptr [[TMP116]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP91:%.*]] = getelementptr inbounds i32, ptr [[TMP116]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP92:%.*]] = getelementptr inbounds i32, ptr [[TMP117]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP92]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP95]], ptr [[TMP91]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP124:%.*]] = getelementptr inbounds i32, ptr [[TMP116]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP125:%.*]] = getelementptr inbounds i32, ptr [[TMP117]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP125]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP96]], ptr [[TMP124]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr [[PAYLOAD_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP98:%.*]] = load i32, ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP98]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP99:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP100:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP101:%.*]] = load i32, ptr [[TMP99]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP101]], ptr [[TMP100]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP134:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP134]], ptr [[TMP135]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP136:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP137:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 20, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP136]], [8 x i32] poison, [10 x i32] [[TMP137]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE-CPS: 100: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP101]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP103:%.*]] = getelementptr i32, ptr [[TMP102]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP104:%.*]] = getelementptr i32, ptr [[TMP103]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP105:%.*]] = load i32, ptr [[TMP104]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP105]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP106:%.*]] = getelementptr i32, ptr [[TMP102]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP107:%.*]] = getelementptr i32, ptr [[TMP106]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP108:%.*]] = load i32, ptr [[TMP107]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP108]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP109:%.*]] = getelementptr i32, ptr [[TMP106]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP110:%.*]] = load i32, ptr [[TMP109]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP110]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP111:%.*]] = getelementptr i32, ptr [[TMP106]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP112:%.*]] = load i32, ptr [[TMP111]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP112]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP113:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP114:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP115:%.*]] = load i32, ptr [[TMP113]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP115]], ptr [[TMP114]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP116:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP117:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP118:%.*]] = load i32, ptr [[TMP116]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP118]], ptr [[TMP117]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP119:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP119]], ptr [[TMP120]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP121:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP121]]) +; LOWERRAYTRACINGPIPELINE-CPS: 106: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP139]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP10]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP111:%.*]] = load i32, ptr [[TMP140]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP111]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP146:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP147:%.*]] = getelementptr inbounds i32, ptr [[TMP140]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP112:%.*]] = load i32, ptr [[TMP147]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP112]], ptr [[TMP146]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP113:%.*]] = getelementptr inbounds i32, ptr [[TMP146]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP114:%.*]] = getelementptr inbounds i32, ptr [[TMP147]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP115:%.*]] = load i32, ptr [[TMP114]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP115]], ptr [[TMP113]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP154:%.*]] = getelementptr inbounds i32, ptr [[TMP146]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP155:%.*]] = getelementptr inbounds i32, ptr [[TMP147]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP118:%.*]] = load i32, ptr [[TMP155]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP118]], ptr [[TMP154]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr [[PAYLOAD_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP120:%.*]] = load i32, ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP120]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP121:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP122:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP123:%.*]] = load i32, ptr [[TMP121]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP123]], ptr [[TMP122]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP164:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP165:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP164]], ptr [[TMP165]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP166:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP167:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 20, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP166]], [8 x i32] poison, [10 x i32] [[TMP167]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE-CPS: 122: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP22]], ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS: 128: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP38]], ptr [[TMP37]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP124:%.*]] = getelementptr i32, ptr [[TMP123]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP125:%.*]] = getelementptr i32, ptr [[TMP124]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP126:%.*]] = load i32, ptr [[TMP125]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP126]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP127:%.*]] = getelementptr i32, ptr [[TMP123]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP128:%.*]] = getelementptr i32, ptr [[TMP127]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP129:%.*]] = load i32, ptr [[TMP128]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP129]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP130:%.*]] = getelementptr i32, ptr [[TMP127]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP131:%.*]] = load i32, ptr [[TMP130]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP131]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP132:%.*]] = getelementptr i32, ptr [[TMP127]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP133:%.*]] = load i32, ptr [[TMP132]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP133]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP134:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP135:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP136:%.*]] = load i32, ptr [[TMP134]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP136]], ptr [[TMP135]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP137:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP138:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP139:%.*]] = load i32, ptr [[TMP137]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP139]], ptr [[TMP138]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP140:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP140]], ptr [[TMP141]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP142:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP142]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP169:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP10]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP130:%.*]] = load i32, ptr [[TMP169]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP130]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP175:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP176:%.*]] = getelementptr inbounds i32, ptr [[TMP169]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP133:%.*]] = load i32, ptr [[TMP176]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP133]], ptr [[TMP175]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP144:%.*]] = getelementptr inbounds i32, ptr [[TMP175]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP145:%.*]] = getelementptr inbounds i32, ptr [[TMP176]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP148:%.*]] = load i32, ptr [[TMP145]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP148]], ptr [[TMP144]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP183:%.*]] = getelementptr inbounds i32, ptr [[TMP175]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP184:%.*]] = getelementptr inbounds i32, ptr [[TMP176]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP149:%.*]] = load i32, ptr [[TMP184]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP149]], ptr [[TMP183]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP150:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP150]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP141:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP142:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP143:%.*]] = load i32, ptr [[TMP141]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP143]], ptr [[TMP142]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP192:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP193:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP192]], ptr [[TMP193]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP194:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP195:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 20, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP194]], [8 x i32] poison, [10 x i32] [[TMP195]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; ; ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyIntersectionShader( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META45:![0-9]+]] !continuation [[META46:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40]] !lgc.cps [[META44:![0-9]+]] !continuation [[META45:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 2 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I2:%.*]] = load float, ptr [[RESPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = bitcast ptr [[TMP4]] to ptr -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP6]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP3]] to ptr +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP5]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGTPTR_I:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGT_I:%.*]] = load float, ptr [[ORIGTPTR_I]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I2]], [[ORIGT_I]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: callAHit.i: ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] (...) @lgc.cps.await__s_struct.AnyHitTraversalDatas(i32 3, i32 8, i32 5, float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP8]]), !continuation.returnedRegistercount !33, !continuation.registercount [[META33:![0-9]+]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP9]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } (...) @lgc.cps.await__sl_s_struct.AnyHitTraversalDatasa8i32a30i32s(i32 3, i32 8, i32 5, float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP7]], [32 x i32] poison, [30 x i32] [[TMP8]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[TMP10]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP11]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: isEnd.i: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP17]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PAYLOAD_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP20]], ptr [[TMP21]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT]] ; LOWERRAYTRACINGPIPELINE-CPS: _cont_ReportHit.exit: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP20:%.*]], label [[TMP22:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: 20: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP21]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP23:%.*]], label [[TMP26:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 21: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP24]], [8 x i32] poison, [30 x i32] [[TMP25]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE-CPS: 22: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP6]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP23]]) +; LOWERRAYTRACINGPIPELINE-CPS: 24: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP5]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP27]], [8 x i32] poison, [30 x i32] [[TMP28]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; ; ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyIntersectionShader2( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META45]] !continuation [[META47:![0-9]+]] { -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40]] !lgc.cps [[META44]] !continuation [[META46:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 2 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I2:%.*]] = load float, ptr [[RESPTR_I]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = bitcast ptr [[TMP4]] to ptr -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP6]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP3]] to ptr +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP5]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGTPTR_I:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGT_I:%.*]] = load float, ptr [[ORIGTPTR_I]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I2]], [[ORIGT_I]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: callAHit.i: ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] (...) @lgc.cps.await__s_struct.AnyHitTraversalDatas(i32 3, i32 8, i32 5, float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[TMP8]]), !continuation.returnedRegistercount !33, !continuation.registercount [[META33]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP9]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } (...) @lgc.cps.await__sl_s_struct.AnyHitTraversalDatasa8i32a30i32s(i32 3, i32 8, i32 5, float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[TMP7]], [32 x i32] poison, [30 x i32] [[TMP8]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[TMP10]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP11]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: isEnd.i: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP17]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PAYLOAD_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP20]], ptr [[TMP21]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT]] ; LOWERRAYTRACINGPIPELINE-CPS: _cont_ReportHit.exit: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP20:%.*]], label [[TMP22:%.*]] -; LOWERRAYTRACINGPIPELINE-CPS: 20: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP21]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP23:%.*]], label [[TMP26:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 21: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP24]], [8 x i32] poison, [30 x i32] [[TMP25]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE-CPS: 22: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP6]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP23]]) +; LOWERRAYTRACINGPIPELINE-CPS: 24: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP5]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP27]], [8 x i32] poison, [30 x i32] [[TMP28]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; ; ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyMissShader( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META45]] !lgc.cps [[META41]] !continuation [[META48:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META44]] !lgc.cps [[META40]] !continuation [[META47:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP6]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP15]], ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> , ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i64 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP10]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i64 1 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP12]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i64 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP16]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> , ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP21]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP22]], ptr [[TMP26]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP33]], ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP36]], ptr [[TMP34]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 3, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP38]], [33 x i32] poison, [10 x i32] [[TMP39]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; ; @@ -1717,75 +1712,90 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; REGISTERBUFFER-CPS-LABEL: define void @MyRayGen( ; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !lgc.cps [[META22]] !continuation [[META35:![0-9]+]] { ; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT20:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP1]]) -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP3]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP1]]) +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP3]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP4]]) -; REGISTERBUFFER-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT20]], 0 ; REGISTERBUFFER-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyRayGen.resume.0) -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP7]], 5 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyRayGen.resume.0) +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP6]], 5 ; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP8]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 ; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i32 [[TMP6]], i32 5), !continuation.returnedRegistercount !33, !continuation.registercount [[META33:![0-9]+]] +; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP7]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 undef, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 undef, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 undef, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 undef, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 undef, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 undef, 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP8]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP9]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP10]], 9 +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i64 [[TMP6]], i32 5, [36 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.returnedRegistercount !33, !continuation.registercount [[META33:![0-9]+]] ; REGISTERBUFFER-CPS-NEXT: unreachable ; ; ; REGISTERBUFFER-CPS-LABEL: define dso_local void @MyRayGen.resume.0( -; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META22]] !lgc.cps [[META22]] !continuation [[META35]] { +; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [33 x i32], [10 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META22]] !lgc.cps [[META22]] !continuation [[META35]] { ; REGISTERBUFFER-CPS-NEXT: entryresume.0: -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; REGISTERBUFFER-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], ptr [[TMP4]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] }, align 8 +; REGISTERBUFFER-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], ptr [[TMP4]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 9 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[DOTFCA_0_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[DOTFCA_7_EXTRACT]] to float ; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP10]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP12]], i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT6:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[DOTFCA_8_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP9]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[DOTFCA_9_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT21:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 ; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; REGISTERBUFFER-CPS-NEXT: [[TMP13:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; REGISTERBUFFER-CPS-NEXT: [[RES_1_I1:%.*]] = load i32, ptr [[TMP4]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2_I2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_1_I1:%.*]] = load i32, ptr [[TMP12]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2_I2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP12]], i32 0, i32 0, i32 1 ; REGISTERBUFFER-CPS-NEXT: [[RES_2_I3:%.*]] = load i32, ptr [[RESPTR_2_I2]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3_I4:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3_I4:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP12]], i32 0, i32 0, i32 2 ; REGISTERBUFFER-CPS-NEXT: [[RES_3_I5:%.*]] = load i32, ptr [[RESPTR_3_I4]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[VAL_0_I6:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I1]], i32 0 ; REGISTERBUFFER-CPS-NEXT: [[VAL_1_I7:%.*]] = insertelement <3 x i32> [[VAL_0_I6]], i32 [[RES_2_I3]], i32 1 ; REGISTERBUFFER-CPS-NEXT: [[VAL_2_I8:%.*]] = insertelement <3 x i32> [[VAL_1_I7]], i32 [[RES_3_I5]], i32 2 ; REGISTERBUFFER-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[VAL_2_I8]], i8 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_1_I:%.*]] = load i32, ptr [[TMP4]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_1_I:%.*]] = load i32, ptr [[TMP13]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP13]], i32 0, i32 0, i32 1 ; REGISTERBUFFER-CPS-NEXT: [[RES_2_I:%.*]] = load i32, ptr [[RESPTR_2_I]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP13]], i32 0, i32 0, i32 2 ; REGISTERBUFFER-CPS-NEXT: [[RES_3_I:%.*]] = load i32, ptr [[RESPTR_3_I]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I]], i32 0 ; REGISTERBUFFER-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x i32> [[VAL_0_I]], i32 [[RES_2_I]], i32 1 ; REGISTERBUFFER-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x i32> [[VAL_1_I]], i32 [[RES_3_I]], i32 2 ; REGISTERBUFFER-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[VAL_2_I]], i8 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP14:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP13]]) -; REGISTERBUFFER-CPS-NEXT: [[TMP15:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP14]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; REGISTERBUFFER-CPS-NEXT: [[TMP14:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP11]]) +; REGISTERBUFFER-CPS-NEXT: [[TMP15:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP14]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) ; REGISTERBUFFER-CPS-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 ; REGISTERBUFFER-CPS-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 ; REGISTERBUFFER-CPS-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 @@ -1795,106 +1805,147 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; REGISTERBUFFER-CPS-LABEL: define void @MyClosestHitShader( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META36:![0-9]+]] !lgc.cps [[META37:![0-9]+]] !continuation [[META38:![0-9]+]] { +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META36:![0-9]+]] !lgc.cps [[META37:![0-9]+]] !continuation [[META38:![0-9]+]] { ; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: ; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_03_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[DOTSROA_03_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float -; REGISTERBUFFER-CPS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_03_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_03_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float -; REGISTERBUFFER-CPS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP5]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <3 x i32> [[SYSTEM_DATA_FCA_0_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP4]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_012_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_012_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; REGISTERBUFFER-CPS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP6]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_012_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_012_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; REGISTERBUFFER-CPS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 ; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = fsub fast float 1.000000e+00, [[TMP6]] -; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = fsub fast float [[TMP7]], [[TMP8]] -; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i64 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP6]], i64 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float 1.000000e+00, i64 3 -; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP14:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP14]], ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP15:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP16:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP18]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = fsub fast float 1.000000e+00, [[TMP9]] +; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP12:%.*]] = fsub fast float [[TMP10]], [[TMP11]] +; REGISTERBUFFER-CPS-NEXT: [[TMP13:%.*]] = insertelement <4 x float> undef, float [[TMP12]], i64 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i64 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float 1.000000e+00, i64 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP18:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP21]], i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT10:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP17]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP18]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP19]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP20]], 9 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 3, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT10]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] ; REGISTERBUFFER-CPS-NEXT: unreachable ; ; ; REGISTERBUFFER-CPS-LABEL: define void @MyAnyHitShader( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META39:![0-9]+]] !lgc.cps [[META36]] !continuation [[META40:![0-9]+]] { +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[HIT_ATTRS:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META39:![0-9]+]] !lgc.cps [[META36]] !continuation [[META40:![0-9]+]] { ; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: ; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[DOTFCA_0_1_0_EXTRACT]], ptr [[DOTFCA_0_1_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[DOTFCA_0_1_1_EXTRACT]], ptr [[DOTFCA_0_1_1_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: store float [[DOTFCA_0_1_2_EXTRACT]], ptr [[DOTFCA_0_1_2_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: store i32 [[DOTFCA_0_1_3_EXTRACT]], ptr [[DOTFCA_0_1_3_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[DOTFCA_0_2_EXTRACT]], ptr [[DOTFCA_0_2_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[DOTFCA_0_3_EXTRACT]], ptr [[DOTFCA_0_3_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; REGISTERBUFFER-CPS-NEXT: store float [[DOTFCA_0_4_EXTRACT]], ptr [[DOTFCA_0_4_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; REGISTERBUFFER-CPS-NEXT: store i64 [[DOTFCA_0_5_EXTRACT]], ptr [[DOTFCA_0_5_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[DOTFCA_1_0_EXTRACT]], ptr [[DOTFCA_1_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: store float [[DOTFCA_1_2_EXTRACT]], ptr [[DOTFCA_1_2_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: store i32 [[DOTFCA_1_3_EXTRACT]], ptr [[DOTFCA_1_3_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP2]]) -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT233:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP3]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0235_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT233]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0235_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0235_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT233]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0235_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_0_0_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_1_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: store float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_2_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: store i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_3_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_2_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_3_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; REGISTERBUFFER-CPS-NEXT: store float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_4_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; REGISTERBUFFER-CPS-NEXT: store i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_5_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_1_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: store float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_2_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: store i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_3_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP4]]) +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT387:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP5]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0389_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT387]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0389_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0389_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT387]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0389_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[HIT_ATTRS_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[HIT_ATTRS]], 0 ; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA:%.*]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_0_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I1_FCA_0_LOAD]], 0 @@ -1911,14 +1962,14 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 1 ; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 2 ; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0256_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0256_4_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0256_8_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_0256_0_VEC_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[DOTSROA_0256_4_VEC_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[DOTSROA_0256_8_VEC_EXTRACT]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0411_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0411_4_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0411_8_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_0411_0_VEC_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[DOTSROA_0411_4_VEC_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[DOTSROA_0411_8_VEC_EXTRACT]], i32 2 ; REGISTERBUFFER-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I9]], i8 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_0_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I_FCA_0_LOAD]], 0 @@ -1942,7 +1993,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; REGISTERBUFFER-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[DOTSROA_1_16_VEC_EXTRACT]], i32 1 ; REGISTERBUFFER-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[DOTSROA_1_20_VEC_EXTRACT]], i32 2 ; REGISTERBUFFER-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_0_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I10_FCA_0_LOAD]], 0 @@ -1959,718 +2010,2316 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 1 ; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 2 ; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = fmul fast float [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT]], [[EXTRACT]] -; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = fadd fast float [[TMP9]], [[EXTRACT1]] -; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], 0.000000e+00 -; REGISTERBUFFER-CPS-NEXT: [[TMP12:%.*]] = fcmp fast ogt float [[TMP10]], 1.000000e+00 -; REGISTERBUFFER-CPS-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP10]], -1.000000e+00 -; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP11]], label [[TMP14:%.*]], label [[TMP37:%.*]] -; REGISTERBUFFER-CPS: 14: -; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP12]], label [[TMP15:%.*]], label [[TMP26:%.*]] -; REGISTERBUFFER-CPS: 15: -; REGISTERBUFFER-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP16]]) -; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP17]], ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP18:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP21:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP21]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0237_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP22]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP23:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = fmul fast float [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT]], [[EXTRACT]] +; REGISTERBUFFER-CPS-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[EXTRACT1]] +; REGISTERBUFFER-CPS-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], 0.000000e+00 +; REGISTERBUFFER-CPS-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], 1.000000e+00 +; REGISTERBUFFER-CPS-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP12]], -1.000000e+00 +; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP13]], label [[TMP16:%.*]], label [[TMP39:%.*]] +; REGISTERBUFFER-CPS: 16: +; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP14]], label [[TMP17:%.*]], label [[TMP28:%.*]] +; REGISTERBUFFER-CPS: 17: +; REGISTERBUFFER-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP18]]) +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP21:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP22:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP23:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 ; REGISTERBUFFER-CPS-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0237_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0237_0_VEC_INSERT]], float [[TMP24]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0237_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT55:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP25]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT55]], ptr [[DOTFCA_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP56:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP56]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0392_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP24]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP25:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP26:%.*]] = bitcast i32 [[TMP25]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0392_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0392_0_VEC_INSERT]], float [[TMP26]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT391:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0392_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT391]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP27]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT]], ptr [[DOTFCA_0_GEP]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP57:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP57]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP58:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP58]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP59:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP59]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_LOAD]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP60:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP60]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_LOAD]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP61:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP61]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP62:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP62]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP63:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP63]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP64:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP64]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_LOAD]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP65:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP65]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_LOAD]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP66:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP66]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_LOAD]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP67:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_1_2_GEP67]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_1_2_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_LOAD]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP68]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP]], align 4 ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_LOAD]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP19]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP20]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP21]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP22]], 9 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 20, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] ; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 26: -; REGISTERBUFFER-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP27]]) -; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT24:%.*]] = extractelement <4 x float> undef, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP28:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT24]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP28]], ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT32:%.*]] = extractelement <4 x float> undef, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP29:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT32]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP29]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT40:%.*]] = extractelement <4 x float> undef, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP30:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT40]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP30]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT48:%.*]] = extractelement <4 x float> undef, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP31:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT48]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP31]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP32:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP33:%.*]] = bitcast i32 [[TMP32]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0241_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP33]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP34:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17]] to i32 +; REGISTERBUFFER-CPS: 28: +; REGISTERBUFFER-CPS-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP29]]) +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT25:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP30:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT25]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT34:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP31:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT34]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT43:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP32:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT43]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT52:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP33:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT52]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP34:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13]] to i32 ; REGISTERBUFFER-CPS-NEXT: [[TMP35:%.*]] = bitcast i32 [[TMP34]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0241_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0241_0_VEC_INSERT]], float [[TMP35]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT240:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0241_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT69:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT240]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP70:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP36]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT69]], ptr [[DOTFCA_0_GEP70]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP71:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD72:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP71]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT73:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD72]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP74:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD75:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP74]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT76:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT73]], <3 x float> [[DOTFCA_0_1_0_LOAD75]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP77:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD78:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP77]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT79:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT76]], <3 x float> [[DOTFCA_0_1_1_LOAD78]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP80:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD81:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP80]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT82:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT79]], float [[DOTFCA_0_1_2_LOAD81]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP83:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD84:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP83]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT85:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT82]], i32 [[DOTFCA_0_1_3_LOAD84]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP86:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD87:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP86]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT88:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT85]], <3 x float> [[DOTFCA_0_2_LOAD87]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP89:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD90:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP89]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT91:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT88]], <3 x float> [[DOTFCA_0_3_LOAD90]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP92:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD93:%.*]] = load float, ptr [[DOTFCA_0_4_GEP92]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT94:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT91]], float [[DOTFCA_0_4_LOAD93]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP95:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD96:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP95]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT97:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT94]], i64 [[DOTFCA_0_5_LOAD96]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP98:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD99:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP98]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT100:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT97]], <3 x float> [[DOTFCA_1_0_LOAD99]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP101:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD102:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP101]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT103:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT100]], <3 x float> [[DOTFCA_1_1_LOAD102]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP104:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD105:%.*]] = load float, ptr [[DOTFCA_1_2_GEP104]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT106:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT103]], float [[DOTFCA_1_2_LOAD105]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP107:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD108:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP107]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT109:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT106]], i32 [[DOTFCA_1_3_LOAD108]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0396_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP35]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP36:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0396_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0396_0_VEC_INSERT]], float [[TMP37]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT395:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0396_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT223:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT395]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP224:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP38]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT223]], ptr [[DOTFCA_0_GEP224]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP225:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD226:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP225]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT227:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD226]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP228:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD229:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP228]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT230:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT227]], <3 x float> [[DOTFCA_0_1_0_LOAD229]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP231:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD232:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP231]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT233:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT230]], <3 x float> [[DOTFCA_0_1_1_LOAD232]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP234:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD235:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP234]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT236:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT233]], float [[DOTFCA_0_1_2_LOAD235]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP237:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD238:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP237]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT239:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT236]], i32 [[DOTFCA_0_1_3_LOAD238]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP240:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD241:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP240]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT242:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT239]], <3 x float> [[DOTFCA_0_2_LOAD241]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP243:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD244:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP243]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT245:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT242]], <3 x float> [[DOTFCA_0_3_LOAD244]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP246:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD247:%.*]] = load float, ptr [[DOTFCA_0_4_GEP246]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT248:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT245]], float [[DOTFCA_0_4_LOAD247]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP249:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD250:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP249]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT251:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT248]], i64 [[DOTFCA_0_5_LOAD250]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP252:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD253:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP252]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT254:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT251]], <3 x float> [[DOTFCA_1_0_LOAD253]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP255:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD256:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP255]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT257:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT254]], <3 x float> [[DOTFCA_1_1_LOAD256]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP258:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD259:%.*]] = load float, ptr [[DOTFCA_1_2_GEP258]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT260:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT257]], float [[DOTFCA_1_2_LOAD259]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP261:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD262:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP261]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT263:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT260]], i32 [[DOTFCA_1_3_LOAD262]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT61:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP30]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT64:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT61]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT67:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT64]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT70:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT67]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT73:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT70]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT76:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT73]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT79:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT76]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT82:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT79]], i32 [[TMP31]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT85:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT82]], i32 [[TMP32]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT88:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT85]], i32 [[TMP33]], 9 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT109]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 20, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT263]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT88]]), !continuation.registercount [[META33]] ; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 37: -; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP13]], label [[TMP38:%.*]], label [[TMP57:%.*]] -; REGISTERBUFFER-CPS: 38: -; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP12]], label [[TMP39:%.*]], label [[TMP48:%.*]] ; REGISTERBUFFER-CPS: 39: -; REGISTERBUFFER-CPS-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP40]]) -; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT26:%.*]] = extractelement <4 x float> undef, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP41:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT26]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP41]], ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT34:%.*]] = extractelement <4 x float> undef, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP42:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT34]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP42]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT42:%.*]] = extractelement <4 x float> undef, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP43:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT42]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP43]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT50:%.*]] = extractelement <4 x float> undef, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP44:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT50]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP44]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP45:%.*]] = bitcast i32 [[TMP4]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0245_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP45]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP46:%.*]] = bitcast i32 [[TMP5]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0245_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0245_0_VEC_INSERT]], float [[TMP46]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT244:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0245_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT110:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT244]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP111:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP47]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT110]], ptr [[DOTFCA_0_GEP111]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP112:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD113:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP112]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT114:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD113]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP115:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD116:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP115]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT117:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT114]], <3 x float> [[DOTFCA_0_1_0_LOAD116]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP118:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD119:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP118]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT120:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT117]], <3 x float> [[DOTFCA_0_1_1_LOAD119]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP121:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD122:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP121]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT123:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT120]], float [[DOTFCA_0_1_2_LOAD122]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP124:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD125:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP124]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT126:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT123]], i32 [[DOTFCA_0_1_3_LOAD125]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP127:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD128:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP127]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT129:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT126]], <3 x float> [[DOTFCA_0_2_LOAD128]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP130:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD131:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP130]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT132:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT129]], <3 x float> [[DOTFCA_0_3_LOAD131]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP133:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD134:%.*]] = load float, ptr [[DOTFCA_0_4_GEP133]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT135:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT132]], float [[DOTFCA_0_4_LOAD134]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP136:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD137:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP136]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT138:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT135]], i64 [[DOTFCA_0_5_LOAD137]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP139:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD140:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP139]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT141:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT138]], <3 x float> [[DOTFCA_1_0_LOAD140]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP142:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD143:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP142]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT144:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT141]], <3 x float> [[DOTFCA_1_1_LOAD143]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP145:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD146:%.*]] = load float, ptr [[DOTFCA_1_2_GEP145]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT147:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT144]], float [[DOTFCA_1_2_LOAD146]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP148:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD149:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP148]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT150:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT147]], i32 [[DOTFCA_1_3_LOAD149]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP15]], label [[TMP40:%.*]], label [[TMP59:%.*]] +; REGISTERBUFFER-CPS: 40: +; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP14]], label [[TMP41:%.*]], label [[TMP50:%.*]] +; REGISTERBUFFER-CPS: 41: +; REGISTERBUFFER-CPS-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP42]]) +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT27:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP43:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT27]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT36:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP44:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT36]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT45:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP45:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT45]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT54:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP46:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT54]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP47:%.*]] = bitcast i32 [[TMP6]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0400_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP47]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP48:%.*]] = bitcast i32 [[TMP7]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0400_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0400_0_VEC_INSERT]], float [[TMP48]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT399:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0400_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT264:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT399]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP265:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP49]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT264]], ptr [[DOTFCA_0_GEP265]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP266:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD267:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP266]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT268:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD267]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP269:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD270:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP269]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT271:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT268]], <3 x float> [[DOTFCA_0_1_0_LOAD270]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP272:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD273:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP272]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT274:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT271]], <3 x float> [[DOTFCA_0_1_1_LOAD273]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP275:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD276:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP275]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT277:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT274]], float [[DOTFCA_0_1_2_LOAD276]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP278:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD279:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP278]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT280:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT277]], i32 [[DOTFCA_0_1_3_LOAD279]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP281:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD282:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP281]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT283:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT280]], <3 x float> [[DOTFCA_0_2_LOAD282]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP284:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD285:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP284]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT286:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT283]], <3 x float> [[DOTFCA_0_3_LOAD285]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP287:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD288:%.*]] = load float, ptr [[DOTFCA_0_4_GEP287]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT286]], float [[DOTFCA_0_4_LOAD288]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP290:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD291:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP290]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT289]], i64 [[DOTFCA_0_5_LOAD291]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP293:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD294:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP293]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT292]], <3 x float> [[DOTFCA_1_0_LOAD294]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP296:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD297:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP296]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT295]], <3 x float> [[DOTFCA_1_1_LOAD297]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP299:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD300:%.*]] = load float, ptr [[DOTFCA_1_2_GEP299]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT298]], float [[DOTFCA_1_2_LOAD300]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP302:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD303:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP302]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT301]], i32 [[DOTFCA_1_3_LOAD303]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT91:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP43]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT94:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT91]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT97:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT94]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT100:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT97]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT103:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT100]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT106:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT103]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT109:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT106]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT112:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT109]], i32 [[TMP44]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT115:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT112]], i32 [[TMP45]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT118:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT115]], i32 [[TMP46]], 9 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT150]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 20, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT304]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT118]]), !continuation.registercount [[META33]] ; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 48: -; REGISTERBUFFER-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP49]]) -; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT28:%.*]] = extractelement <4 x float> undef, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP50:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT28]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP50]], ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT36:%.*]] = extractelement <4 x float> undef, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP51:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT36]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP51]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT44:%.*]] = extractelement <4 x float> undef, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP52:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT44]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP52]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT52:%.*]] = extractelement <4 x float> undef, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP53:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT52]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP53]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP54:%.*]] = bitcast i32 [[TMP4]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0249_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP54]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP55:%.*]] = bitcast i32 [[TMP5]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0249_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0249_0_VEC_INSERT]], float [[TMP55]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT248:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0249_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT151:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT248]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP152:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP56]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT151]], ptr [[DOTFCA_0_GEP152]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP153:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD154:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP153]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT155:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD154]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP156:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD157:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP156]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT158:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT155]], <3 x float> [[DOTFCA_0_1_0_LOAD157]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP159:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD160:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP159]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT161:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT158]], <3 x float> [[DOTFCA_0_1_1_LOAD160]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP162:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD163:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP162]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT164:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT161]], float [[DOTFCA_0_1_2_LOAD163]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP165:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD166:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP165]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT167:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT164]], i32 [[DOTFCA_0_1_3_LOAD166]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP168:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD169:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP168]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT170:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT167]], <3 x float> [[DOTFCA_0_2_LOAD169]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP171:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD172:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP171]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT173:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT170]], <3 x float> [[DOTFCA_0_3_LOAD172]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP174:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD175:%.*]] = load float, ptr [[DOTFCA_0_4_GEP174]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT176:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT173]], float [[DOTFCA_0_4_LOAD175]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP177:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD178:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP177]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT179:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT176]], i64 [[DOTFCA_0_5_LOAD178]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP180:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD181:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP180]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT182:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT179]], <3 x float> [[DOTFCA_1_0_LOAD181]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP183:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD184:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP183]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT185:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT182]], <3 x float> [[DOTFCA_1_1_LOAD184]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP186:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD187:%.*]] = load float, ptr [[DOTFCA_1_2_GEP186]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT188:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT185]], float [[DOTFCA_1_2_LOAD187]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP189:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD190:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP189]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT191:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT188]], i32 [[DOTFCA_1_3_LOAD190]], 1, 3 +; REGISTERBUFFER-CPS: 50: +; REGISTERBUFFER-CPS-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP51]]) +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT29:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP52:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT29]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT38:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP53:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT38]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT47:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP54:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT47]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT56:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP55:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT56]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP56:%.*]] = bitcast i32 [[TMP6]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0404_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP56]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP57:%.*]] = bitcast i32 [[TMP7]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0404_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0404_0_VEC_INSERT]], float [[TMP57]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT403:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0404_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT305:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT403]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP306:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP58]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT305]], ptr [[DOTFCA_0_GEP306]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP307:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD308:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP307]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT309:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD308]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP310:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD311:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP310]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT312:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT309]], <3 x float> [[DOTFCA_0_1_0_LOAD311]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP313:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD314:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP313]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT315:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT312]], <3 x float> [[DOTFCA_0_1_1_LOAD314]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP316:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD317:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP316]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT318:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT315]], float [[DOTFCA_0_1_2_LOAD317]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP319:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD320:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP319]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT321:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT318]], i32 [[DOTFCA_0_1_3_LOAD320]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP322:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD323:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP322]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT324:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT321]], <3 x float> [[DOTFCA_0_2_LOAD323]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP325:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD326:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP325]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT327:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT324]], <3 x float> [[DOTFCA_0_3_LOAD326]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP328:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD329:%.*]] = load float, ptr [[DOTFCA_0_4_GEP328]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT330:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT327]], float [[DOTFCA_0_4_LOAD329]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP331:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD332:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP331]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT333:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT330]], i64 [[DOTFCA_0_5_LOAD332]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP334:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD335:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP334]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT336:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT333]], <3 x float> [[DOTFCA_1_0_LOAD335]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP337:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD338:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP337]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT339:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT336]], <3 x float> [[DOTFCA_1_1_LOAD338]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP340:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD341:%.*]] = load float, ptr [[DOTFCA_1_2_GEP340]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT342:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT339]], float [[DOTFCA_1_2_LOAD341]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP343:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD344:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP343]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT345:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT342]], i32 [[DOTFCA_1_3_LOAD344]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT121:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP52]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT124:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT121]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT127:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT124]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT130:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT127]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT133:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT130]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT136:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT133]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT139:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT136]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT142:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT139]], i32 [[TMP53]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT145:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT142]], i32 [[TMP54]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT148:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT145]], i32 [[TMP55]], 9 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT191]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 20, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT345]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT148]]), !continuation.registercount [[META33]] ; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 57: +; REGISTERBUFFER-CPS: 59: ; REGISTERBUFFER-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT30:%.*]] = extractelement <4 x float> undef, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP58:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT30]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP58]], ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT38:%.*]] = extractelement <4 x float> undef, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP59:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT38]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP59]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT46:%.*]] = extractelement <4 x float> undef, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP60:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT46]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP60]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT54:%.*]] = extractelement <4 x float> undef, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP61:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT54]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP61]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP62:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP63:%.*]] = bitcast i32 [[TMP62]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0253_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP63]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP64:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT31:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP60:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT31]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT40:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP61:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT40]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT49:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP62:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT49]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT58:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP63:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT58]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP64:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15]] to i32 ; REGISTERBUFFER-CPS-NEXT: [[TMP65:%.*]] = bitcast i32 [[TMP64]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0253_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0253_0_VEC_INSERT]], float [[TMP65]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT252:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0253_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT192:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT252]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP193:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP66]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT192]], ptr [[DOTFCA_0_GEP193]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP194:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD195:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP194]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT196:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD195]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP197:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD198:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP197]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT199:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT196]], <3 x float> [[DOTFCA_0_1_0_LOAD198]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP200:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD201:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP200]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT202:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT199]], <3 x float> [[DOTFCA_0_1_1_LOAD201]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP203:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD204:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP203]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT205:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT202]], float [[DOTFCA_0_1_2_LOAD204]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP206:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD207:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP206]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT208:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT205]], i32 [[DOTFCA_0_1_3_LOAD207]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP209:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD210:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP209]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT211:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT208]], <3 x float> [[DOTFCA_0_2_LOAD210]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP212:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD213:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP212]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT214:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT211]], <3 x float> [[DOTFCA_0_3_LOAD213]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP215:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD216:%.*]] = load float, ptr [[DOTFCA_0_4_GEP215]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT217:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT214]], float [[DOTFCA_0_4_LOAD216]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP218:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD219:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP218]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT220:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT217]], i64 [[DOTFCA_0_5_LOAD219]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP221:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD222:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP221]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT223:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT220]], <3 x float> [[DOTFCA_1_0_LOAD222]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP224:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD225:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP224]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT226:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT223]], <3 x float> [[DOTFCA_1_1_LOAD225]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP227:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD228:%.*]] = load float, ptr [[DOTFCA_1_2_GEP227]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT229:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT226]], float [[DOTFCA_1_2_LOAD228]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP230:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD231:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP230]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT232:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT229]], i32 [[DOTFCA_1_3_LOAD231]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0408_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP65]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP66:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP67:%.*]] = bitcast i32 [[TMP66]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0408_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0408_0_VEC_INSERT]], float [[TMP67]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT407:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0408_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT346:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT407]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP347:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP68]], i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT346]], ptr [[DOTFCA_0_GEP347]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP348:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD349:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP348]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT350:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD349]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP351:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD352:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP351]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT353:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT350]], <3 x float> [[DOTFCA_0_1_0_LOAD352]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP354:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD355:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP354]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT356:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT353]], <3 x float> [[DOTFCA_0_1_1_LOAD355]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP357:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD358:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP357]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT359:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT356]], float [[DOTFCA_0_1_2_LOAD358]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP360:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD361:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP360]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT362:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT359]], i32 [[DOTFCA_0_1_3_LOAD361]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP363:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD364:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP363]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT365:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT362]], <3 x float> [[DOTFCA_0_2_LOAD364]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP366:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD367:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP366]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT368:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT365]], <3 x float> [[DOTFCA_0_3_LOAD367]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP369:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD370:%.*]] = load float, ptr [[DOTFCA_0_4_GEP369]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT371:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT368]], float [[DOTFCA_0_4_LOAD370]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP372:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD373:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP372]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT374:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT371]], i64 [[DOTFCA_0_5_LOAD373]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP375:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD376:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP375]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT377:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT374]], <3 x float> [[DOTFCA_1_0_LOAD376]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP378:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD379:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP378]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT380:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT377]], <3 x float> [[DOTFCA_1_1_LOAD379]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP381:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD382:%.*]] = load float, ptr [[DOTFCA_1_2_GEP381]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT383:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT380]], float [[DOTFCA_1_2_LOAD382]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP384:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD385:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP384]], align 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT386:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT383]], i32 [[DOTFCA_1_3_LOAD385]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT151:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP60]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT154:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT151]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT157:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT154]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT160:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT157]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT163:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT160]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT166:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT163]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT169:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT166]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT172:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT169]], i32 [[TMP61]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT175:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT172]], i32 [[TMP62]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT178:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT175]], i32 [[TMP63]], 9 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT232]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 20, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT386]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT178]]), !continuation.registercount [[META33]] ; REGISTERBUFFER-CPS-NEXT: unreachable ; ; ; REGISTERBUFFER-CPS-LABEL: define void @MyIntersectionShader( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41:![0-9]+]] !continuation [[META42:![0-9]+]] { +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41:![0-9]+]] !continuation [[META42:![0-9]+]] { ; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: ; REGISTERBUFFER-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) ; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 3 ; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP1]], 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP0:%.*]] = bitcast <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] to <3 x float> +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP0]], 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 0 ; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP2]], 3 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP1]], 3 ; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 ; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 ; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 ; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] +; REGISTERBUFFER-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[SYSTEM_DATA_FCA_0_4_EXTRACT]] ; REGISTERBUFFER-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] ; REGISTERBUFFER-CPS: callAHit.i: -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyIntersectionShader.resume.0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i32 [[TMP3]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32:![0-9]+]] +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT350:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT11:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT8]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT14:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT11]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT17:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT14]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT20:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT17]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT23:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT20]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT26:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT23]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT29:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT26]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT32:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT29]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT35:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT32]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT38:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT35]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT41:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT38]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT44:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT41]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT47:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT44]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT50:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT47]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT53:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT50]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT56:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT53]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT59:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT56]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT62:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT59]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT65:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT62]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT68:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT65]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT71:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT68]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT74:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT71]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT77:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT74]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT80:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT77]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT83:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT80]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT86:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT83]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT89:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT86]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyIntersectionShader.resume.0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i64 [[TMP2]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT350]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32:![0-9]+]] ; REGISTERBUFFER-CPS-NEXT: unreachable ; REGISTERBUFFER-CPS: isEnd.i: ; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0353_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP4]], i32 0 ; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP7]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <3 x i32> -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0353_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0353_0_VEC_INSERT]], float [[TMP6]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT352:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0353_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT286:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT352]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT286]] to <2 x i32> +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <3 x i32> +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] ; REGISTERBUFFER-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP9:%.*]], label [[TMP10:%.*]] -; REGISTERBUFFER-CPS: 9: -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP8:%.*]], label [[TMP9:%.*]] +; REGISTERBUFFER-CPS: 8: +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] ; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 10: +; REGISTERBUFFER-CPS: 9: ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] ; REGISTERBUFFER-CPS-NEXT: unreachable ; ; ; REGISTERBUFFER-CPS-LABEL: define dso_local void @MyIntersectionShader.resume.0( -; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META42]] { +; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_ANYHITTRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META42]] { ; REGISTERBUFFER-CPS-NEXT: entryresume.0: ; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 9 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 10 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 11 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 12 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 13 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 14 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 15 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 16 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 17 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 18 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 19 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 20 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 21 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 22 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 23 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 24 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 25 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 26 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 27 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 28 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 29 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 3 ; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; REGISTERBUFFER-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP5:%.*]], label [[TMP6:%.*]] -; REGISTERBUFFER-CPS: 5: +; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP7:%.*]], label [[TMP8:%.*]] +; REGISTERBUFFER-CPS: 7: ; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR1:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR1]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[DOTFCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[DOTFCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[DOTFCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[DOTFCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[DOTFCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[DOTFCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[DOTFCA_7_EXTRACT]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[DOTFCA_8_EXTRACT]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[DOTFCA_9_EXTRACT]], 9 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[DOTFCA_10_EXTRACT]], 10 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[DOTFCA_11_EXTRACT]], 11 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[DOTFCA_12_EXTRACT]], 12 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[DOTFCA_13_EXTRACT]], 13 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[DOTFCA_14_EXTRACT]], 14 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[DOTFCA_15_EXTRACT]], 15 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[DOTFCA_16_EXTRACT]], 16 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[DOTFCA_17_EXTRACT]], 17 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[DOTFCA_18_EXTRACT]], 18 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[DOTFCA_19_EXTRACT]], 19 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[DOTFCA_20_EXTRACT]], 20 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[DOTFCA_21_EXTRACT]], 21 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[DOTFCA_22_EXTRACT]], 22 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[DOTFCA_23_EXTRACT]], 23 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[DOTFCA_24_EXTRACT]], 24 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[DOTFCA_25_EXTRACT]], 25 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[DOTFCA_26_EXTRACT]], 26 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[DOTFCA_27_EXTRACT]], 27 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[DOTFCA_28_EXTRACT]], 28 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[DOTFCA_29_EXTRACT]], 29 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] ; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 6: +; REGISTERBUFFER-CPS: 8: ; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] ; REGISTERBUFFER-CPS-NEXT: unreachable ; ; ; REGISTERBUFFER-CPS-LABEL: define void @MyIntersectionShader2( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43:![0-9]+]] { +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43:![0-9]+]] { ; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: ; REGISTERBUFFER-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) ; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER2_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 3 ; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP1]], 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP0:%.*]] = bitcast <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] to <3 x float> +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP0]], 0 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 0 ; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP2]], 3 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP1]], 3 ; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 ; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 ; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 ; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] +; REGISTERBUFFER-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[SYSTEM_DATA_FCA_0_4_EXTRACT]] ; REGISTERBUFFER-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] ; REGISTERBUFFER-CPS: callAHit.i: -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]] poison, <2 x float> undef, 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyIntersectionShader2.resume.0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i32 [[TMP3]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32]] +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT350:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]] poison, <2 x float> undef, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT11:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT8]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT14:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT11]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT17:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT14]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT20:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT17]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT23:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT20]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT26:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT23]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT29:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT26]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT32:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT29]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT35:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT32]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT38:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT35]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT41:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT38]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT44:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT41]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT47:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT44]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT50:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT47]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT53:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT50]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT56:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT53]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT59:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT56]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT62:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT59]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT65:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT62]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT68:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT65]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT71:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT68]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT74:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT71]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT77:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT74]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT80:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT77]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT83:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT80]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT86:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT83]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT89:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT86]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyIntersectionShader2.resume.0) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i64 [[TMP2]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT350]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32]] ; REGISTERBUFFER-CPS-NEXT: unreachable ; REGISTERBUFFER-CPS: isEnd.i: ; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0353_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP4]], i32 0 ; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP7]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <3 x i32> -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0353_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0353_0_VEC_INSERT]], float [[TMP6]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT352:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0353_4_VEC_INSERT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT286:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT352]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT286]] to <2 x i32> +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <3 x i32> +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] ; REGISTERBUFFER-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP9:%.*]], label [[TMP10:%.*]] -; REGISTERBUFFER-CPS: 9: -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP8:%.*]], label [[TMP9:%.*]] +; REGISTERBUFFER-CPS: 8: +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] ; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 10: +; REGISTERBUFFER-CPS: 9: ; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] ; REGISTERBUFFER-CPS-NEXT: unreachable ; ; ; REGISTERBUFFER-CPS-LABEL: define dso_local void @MyIntersectionShader2.resume.0( -; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43]] { +; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_ANYHITTRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43]] { ; REGISTERBUFFER-CPS-NEXT: entryresume.0: ; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 9 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 10 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 11 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 12 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 13 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 14 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 15 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 16 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 17 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 18 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 19 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 20 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 21 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 22 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 23 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 24 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 25 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 26 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 27 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 28 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 29 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 3 ; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; REGISTERBUFFER-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP5:%.*]], label [[TMP6:%.*]] -; REGISTERBUFFER-CPS: 5: +; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP7:%.*]], label [[TMP8:%.*]] +; REGISTERBUFFER-CPS: 7: ; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR1:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER2_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR1]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[DOTFCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[DOTFCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[DOTFCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[DOTFCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[DOTFCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[DOTFCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[DOTFCA_7_EXTRACT]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[DOTFCA_8_EXTRACT]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[DOTFCA_9_EXTRACT]], 9 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[DOTFCA_10_EXTRACT]], 10 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[DOTFCA_11_EXTRACT]], 11 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[DOTFCA_12_EXTRACT]], 12 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[DOTFCA_13_EXTRACT]], 13 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[DOTFCA_14_EXTRACT]], 14 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[DOTFCA_15_EXTRACT]], 15 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[DOTFCA_16_EXTRACT]], 16 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[DOTFCA_17_EXTRACT]], 17 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[DOTFCA_18_EXTRACT]], 18 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[DOTFCA_19_EXTRACT]], 19 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[DOTFCA_20_EXTRACT]], 20 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[DOTFCA_21_EXTRACT]], 21 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[DOTFCA_22_EXTRACT]], 22 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[DOTFCA_23_EXTRACT]], 23 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[DOTFCA_24_EXTRACT]], 24 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[DOTFCA_25_EXTRACT]], 25 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[DOTFCA_26_EXTRACT]], 26 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[DOTFCA_27_EXTRACT]], 27 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[DOTFCA_28_EXTRACT]], 28 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[DOTFCA_29_EXTRACT]], 29 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] ; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 6: +; REGISTERBUFFER-CPS: 8: ; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER2_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 ; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] ; REGISTERBUFFER-CPS-NEXT: unreachable ; ; ; REGISTERBUFFER-CPS-LABEL: define void @MyMissShader( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META37]] !continuation [[META44:![0-9]+]] { +; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META37]] !continuation [[META44:![0-9]+]] { ; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 0, 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 +; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 +; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 ; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; REGISTERBUFFER-CPS-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) ; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP2]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 ; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: store i32 [[TMP4]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT9:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_EXTRACT]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP4]], 0 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP5]], 7 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP6]], 8 +; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP7]], 9 ; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 3, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] ; REGISTERBUFFER-CPS-NEXT: unreachable ; ; +; POSTPROCESS-LABEL: define i32 @_cont_GetContinuationStackAddr( +; POSTPROCESS-SAME: ) #[[ATTR0:[0-9]+]] { +; POSTPROCESS-NEXT: ret i32 0 +; +; +; POSTPROCESS-LABEL: define %struct.HitData @_cont_GetCandidateState( +; POSTPROCESS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; POSTPROCESS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_ANYHITTRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; POSTPROCESS-NEXT: [[RES:%.*]] = load [[STRUCT_HITDATA:%.*]], ptr [[RESPTR]], align 4 +; POSTPROCESS-NEXT: ret [[STRUCT_HITDATA]] [[RES]] +; +; +; POSTPROCESS-LABEL: define void @_cont_SetTriangleHitAttributes( +; POSTPROCESS-SAME: ptr [[DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[VAL:%.*]]) { +; POSTPROCESS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; POSTPROCESS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]], ptr [[ADDR]], align 4 +; POSTPROCESS-NEXT: ret void +; +; +; POSTPROCESS-LABEL: define i32 @_cont_GetLocalRootIndex( +; POSTPROCESS-SAME: ptr [[DATA:%.*]]) { +; POSTPROCESS-NEXT: ret i32 5 +; +; +; POSTPROCESS-LABEL: define i1 @_cont_IsEndSearch( +; POSTPROCESS-SAME: ptr [[TMP0:%.*]]) #[[ATTR0]] { +; POSTPROCESS-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() +; POSTPROCESS-NEXT: ret i1 [[ISEND]] +; +; +; POSTPROCESS-LABEL: define <3 x i32> @_cont_DispatchRaysIndex3( +; POSTPROCESS-SAME: ptr [[DATA:%.*]]) { +; POSTPROCESS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[RES_1:%.*]] = load i32, ptr [[RESPTR_1]], align 4 +; POSTPROCESS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 1 +; POSTPROCESS-NEXT: [[RES_2:%.*]] = load i32, ptr [[RESPTR_2]], align 4 +; POSTPROCESS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 2 +; POSTPROCESS-NEXT: [[RES_3:%.*]] = load i32, ptr [[RESPTR_3]], align 4 +; POSTPROCESS-NEXT: [[VAL_0:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1]], i32 0 +; POSTPROCESS-NEXT: [[VAL_1:%.*]] = insertelement <3 x i32> [[VAL_0]], i32 [[RES_2]], i32 1 +; POSTPROCESS-NEXT: [[VAL_2:%.*]] = insertelement <3 x i32> [[VAL_1]], i32 [[RES_3]], i32 2 +; POSTPROCESS-NEXT: ret <3 x i32> [[VAL_2]] +; +; +; POSTPROCESS-LABEL: define <3 x float> @_cont_ObjectRayOrigin3( +; POSTPROCESS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; POSTPROCESS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; POSTPROCESS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 1 +; POSTPROCESS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; POSTPROCESS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 2 +; POSTPROCESS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; POSTPROCESS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; POSTPROCESS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; POSTPROCESS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; POSTPROCESS-NEXT: ret <3 x float> [[VAL_2]] +; +; +; POSTPROCESS-LABEL: define <3 x float> @_cont_ObjectRayDirection3( +; POSTPROCESS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; POSTPROCESS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; POSTPROCESS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; POSTPROCESS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; POSTPROCESS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; POSTPROCESS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; POSTPROCESS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; POSTPROCESS-NEXT: ret <3 x float> [[VAL_2]] +; +; +; POSTPROCESS-LABEL: define float @_cont_RayTCurrent( +; POSTPROCESS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; POSTPROCESS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 2 +; POSTPROCESS-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 +; POSTPROCESS-NEXT: ret float [[RES]] +; +; +; POSTPROCESS-LABEL: define void @MyRayGen( +; POSTPROCESS-SAME: ) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META22]] !continuation [[META35:![0-9]+]] !continuation.state [[META22]] { +; POSTPROCESS-NEXT: AllocaSpillBB: +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 0, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[_CONT_SETUPRAYGEN:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; POSTPROCESS-NEXT: [[TMP0:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP0]]) +; POSTPROCESS-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POSTPROCESS-NEXT: [[TMP4:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP3]]) +; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 +; POSTPROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyRayGen.resume.0 to i64)) +; POSTPROCESS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP5]], 5 +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP10]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META33:![0-9]+]], !continuation.returnedRegistercount !33 +; POSTPROCESS-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define dso_local void @MyRayGen.resume.0( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META33]] !continuation [[META35]] { +; POSTPROCESS-NEXT: entryresume.0: +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP6]], i32 2 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP8]], i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT6:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; POSTPROCESS-NEXT: [[TMP9:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POSTPROCESS-NEXT: [[RES_1_I1:%.*]] = load i32, ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; POSTPROCESS-NEXT: [[RESPTR_2_I2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1 +; POSTPROCESS-NEXT: [[RES_2_I3:%.*]] = load i32, ptr [[RESPTR_2_I2]], align 4 +; POSTPROCESS-NEXT: [[RESPTR_3_I4:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-NEXT: [[RES_3_I5:%.*]] = load i32, ptr [[RESPTR_3_I4]], align 4 +; POSTPROCESS-NEXT: [[VAL_0_I6:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I1]], i32 0 +; POSTPROCESS-NEXT: [[VAL_1_I7:%.*]] = insertelement <3 x i32> [[VAL_0_I6]], i32 [[RES_2_I3]], i32 1 +; POSTPROCESS-NEXT: [[VAL_2_I8:%.*]] = insertelement <3 x i32> [[VAL_1_I7]], i32 [[RES_3_I5]], i32 2 +; POSTPROCESS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[VAL_2_I8]], i8 0 +; POSTPROCESS-NEXT: [[RES_1_I:%.*]] = load i32, ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; POSTPROCESS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1 +; POSTPROCESS-NEXT: [[RES_2_I:%.*]] = load i32, ptr [[RESPTR_2_I]], align 4 +; POSTPROCESS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-NEXT: [[RES_3_I:%.*]] = load i32, ptr [[RESPTR_3_I]], align 4 +; POSTPROCESS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I]], i32 0 +; POSTPROCESS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x i32> [[VAL_0_I]], i32 [[RES_2_I]], i32 1 +; POSTPROCESS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x i32> [[VAL_1_I]], i32 [[RES_3_I]], i32 2 +; POSTPROCESS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[VAL_2_I]], i8 1 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP9]]) +; POSTPROCESS-NEXT: [[TMP11:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP10]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; POSTPROCESS-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 +; POSTPROCESS-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 +; POSTPROCESS-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 +; POSTPROCESS-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 +; POSTPROCESS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP11]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP12]], float [[TMP13]], float [[TMP14]], float [[TMP15]], i8 15) +; POSTPROCESS-NEXT: ret void +; POSTPROCESS: entryresume.0.split: +; POSTPROCESS-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define void @MyClosestHitShader( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META36:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META37:![0-9]+]] !continuation.state [[META22]] { +; POSTPROCESS-NEXT: AllocaSpillBB: +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 +; POSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP5]], i32 1 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP7]], i32 2 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP9]], i32 3 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP10]], 0 +; POSTPROCESS-NEXT: [[DOTSROA_07_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; POSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_07_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float +; POSTPROCESS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP12]], i32 0 +; POSTPROCESS-NEXT: [[DOTSROA_07_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; POSTPROCESS-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_07_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP13]] to float +; POSTPROCESS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP14]], i32 1 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 +; POSTPROCESS-NEXT: [[TMP16:%.*]] = fsub fast float 1.000000e+00, [[TMP15]] +; POSTPROCESS-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 +; POSTPROCESS-NEXT: [[TMP18:%.*]] = fsub fast float [[TMP16]], [[TMP17]] +; POSTPROCESS-NEXT: [[TMP19:%.*]] = insertelement <4 x float> undef, float [[TMP18]], i64 0 +; POSTPROCESS-NEXT: [[TMP20:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP15]], i64 1 +; POSTPROCESS-NEXT: [[TMP21:%.*]] = insertelement <4 x float> [[TMP20]], float [[TMP17]], i64 2 +; POSTPROCESS-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float 1.000000e+00, i64 3 +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 0 +; POSTPROCESS-NEXT: [[TMP23:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 1 +; POSTPROCESS-NEXT: [[TMP24:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 2 +; POSTPROCESS-NEXT: [[TMP25:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 3 +; POSTPROCESS-NEXT: [[TMP26:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP27]], i32 0, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 +; POSTPROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP28]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define void @MyAnyHitShader( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META38:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META39:![0-9]+]] !continuation.state [[META22]] { +; POSTPROCESS-NEXT: AllocaSpillBB: +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_0_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: store <3 x float> [[DOTFCA_0_1_0_EXTRACT]], ptr [[DOTFCA_0_1_0_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: store <3 x float> [[DOTFCA_0_1_1_EXTRACT]], ptr [[DOTFCA_0_1_1_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: store float [[DOTFCA_0_1_2_EXTRACT]], ptr [[DOTFCA_0_1_2_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: store i32 [[DOTFCA_0_1_3_EXTRACT]], ptr [[DOTFCA_0_1_3_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-NEXT: store <3 x float> [[DOTFCA_0_2_EXTRACT]], ptr [[DOTFCA_0_2_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-NEXT: store <3 x float> [[DOTFCA_0_3_EXTRACT]], ptr [[DOTFCA_0_3_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-NEXT: store float [[DOTFCA_0_4_EXTRACT]], ptr [[DOTFCA_0_4_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-NEXT: store i64 [[DOTFCA_0_5_EXTRACT]], ptr [[DOTFCA_0_5_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: store <3 x float> [[DOTFCA_1_0_EXTRACT]], ptr [[DOTFCA_1_0_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: store <3 x float> [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: store float [[DOTFCA_1_2_EXTRACT]], ptr [[DOTFCA_1_2_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: store i32 [[DOTFCA_1_3_EXTRACT]], ptr [[DOTFCA_1_3_GEP]], align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP4]], i32 0 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP6]], i32 1 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP8]], i32 2 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 +; POSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[TMP12:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP11]]) +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT237:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP12]], 0 +; POSTPROCESS-NEXT: [[DOTSROA_0239_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT237]], i32 0 +; POSTPROCESS-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_0239_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[DOTSROA_0239_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT237]], i32 1 +; POSTPROCESS-NEXT: [[TMP14:%.*]] = bitcast float [[DOTSROA_0239_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[RES_I1_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA:%.*]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; POSTPROCESS-NEXT: [[RES_I1_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_0_GEP]], align 4 +; POSTPROCESS-NEXT: [[RES_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I1_FCA_0_LOAD]], 0 +; POSTPROCESS-NEXT: [[RES_I1_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; POSTPROCESS-NEXT: [[RES_I1_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_1_GEP]], align 4 +; POSTPROCESS-NEXT: [[RES_I1_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_0_INSERT]], <3 x float> [[RES_I1_FCA_1_LOAD]], 1 +; POSTPROCESS-NEXT: [[RES_I1_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; POSTPROCESS-NEXT: [[RES_I1_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I1_FCA_2_GEP]], align 4 +; POSTPROCESS-NEXT: [[RES_I1_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_1_INSERT]], float [[RES_I1_FCA_2_LOAD]], 2 +; POSTPROCESS-NEXT: [[RES_I1_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; POSTPROCESS-NEXT: [[RES_I1_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I1_FCA_3_GEP]], align 4 +; POSTPROCESS-NEXT: [[RES_I1_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_2_INSERT]], i32 [[RES_I1_FCA_3_LOAD]], 3 +; POSTPROCESS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 0 +; POSTPROCESS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 1 +; POSTPROCESS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 2 +; POSTPROCESS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 3 +; POSTPROCESS-NEXT: [[DOTSROA_0260_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 0 +; POSTPROCESS-NEXT: [[DOTSROA_0260_4_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 1 +; POSTPROCESS-NEXT: [[DOTSROA_0260_8_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 2 +; POSTPROCESS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_0260_0_VEC_EXTRACT]], i32 0 +; POSTPROCESS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[DOTSROA_0260_4_VEC_EXTRACT]], i32 1 +; POSTPROCESS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[DOTSROA_0260_8_VEC_EXTRACT]], i32 2 +; POSTPROCESS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I9]], i8 0 +; POSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; POSTPROCESS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_0_GEP]], align 4 +; POSTPROCESS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I_FCA_0_LOAD]], 0 +; POSTPROCESS-NEXT: [[RES_I_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; POSTPROCESS-NEXT: [[RES_I_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_1_GEP]], align 4 +; POSTPROCESS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[RES_I_FCA_1_LOAD]], 1 +; POSTPROCESS-NEXT: [[RES_I_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; POSTPROCESS-NEXT: [[RES_I_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I_FCA_2_GEP]], align 4 +; POSTPROCESS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[RES_I_FCA_2_LOAD]], 2 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I_FCA_3_GEP]], align 4 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[RES_I_FCA_3_LOAD]], 3 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; POSTPROCESS-NEXT: [[DOTSROA_1_12_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 0 +; POSTPROCESS-NEXT: [[DOTSROA_1_16_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 1 +; POSTPROCESS-NEXT: [[DOTSROA_1_20_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 2 +; POSTPROCESS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_1_12_VEC_EXTRACT]], i32 0 +; POSTPROCESS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[DOTSROA_1_16_VEC_EXTRACT]], i32 1 +; POSTPROCESS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[DOTSROA_1_20_VEC_EXTRACT]], i32 2 +; POSTPROCESS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 +; POSTPROCESS-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[RES_I10_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; POSTPROCESS-NEXT: [[RES_I10_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_0_GEP]], align 4 +; POSTPROCESS-NEXT: [[RES_I10_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I10_FCA_0_LOAD]], 0 +; POSTPROCESS-NEXT: [[RES_I10_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; POSTPROCESS-NEXT: [[RES_I10_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_1_GEP]], align 4 +; POSTPROCESS-NEXT: [[RES_I10_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_0_INSERT]], <3 x float> [[RES_I10_FCA_1_LOAD]], 1 +; POSTPROCESS-NEXT: [[RES_I10_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; POSTPROCESS-NEXT: [[RES_I10_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I10_FCA_2_GEP]], align 4 +; POSTPROCESS-NEXT: [[RES_I10_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_1_INSERT]], float [[RES_I10_FCA_2_LOAD]], 2 +; POSTPROCESS-NEXT: [[RES_I10_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; POSTPROCESS-NEXT: [[RES_I10_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I10_FCA_3_GEP]], align 4 +; POSTPROCESS-NEXT: [[RES_I10_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_2_INSERT]], i32 [[RES_I10_FCA_3_LOAD]], 3 +; POSTPROCESS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 0 +; POSTPROCESS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 1 +; POSTPROCESS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 2 +; POSTPROCESS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 3 +; POSTPROCESS-NEXT: [[TMP18:%.*]] = fmul fast float [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT]], [[EXTRACT]] +; POSTPROCESS-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP18]], [[EXTRACT1]] +; POSTPROCESS-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP19]], 0.000000e+00 +; POSTPROCESS-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP19]], 1.000000e+00 +; POSTPROCESS-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP19]], -1.000000e+00 +; POSTPROCESS-NEXT: br i1 [[TMP20]], label [[TMP23:%.*]], label [[TMP48:%.*]] +; POSTPROCESS: 23: +; POSTPROCESS-NEXT: br i1 [[TMP21]], label [[TMP24:%.*]], label [[TMP36:%.*]] +; POSTPROCESS: 24: +; POSTPROCESS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP25]]) +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; POSTPROCESS-NEXT: [[TMP26:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; POSTPROCESS-NEXT: [[TMP27:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; POSTPROCESS-NEXT: [[TMP28:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; POSTPROCESS-NEXT: [[TMP29:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP29]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; POSTPROCESS-NEXT: [[TMP30:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[TMP31:%.*]] = bitcast i32 [[TMP30]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0241_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP31]], i32 0 +; POSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; POSTPROCESS-NEXT: [[TMP32:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[TMP33:%.*]] = bitcast i32 [[TMP32]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0241_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0241_0_VEC_INSERT]], float [[TMP33]], i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0241_4_VEC_INSERT]], 0 +; POSTPROCESS-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT59:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP34]], i32 0, i32 0 +; POSTPROCESS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT59]], ptr [[DOTFCA_0_GEP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP60:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP60]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_GEP61:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP61]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_GEP62:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP62]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_GEP63:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP63]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_LOAD]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_GEP64:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP64]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_LOAD]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_GEP65:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP65]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_GEP66:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP66]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_GEP67:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP67]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_GEP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP68]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_LOAD]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_GEP69:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP69]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_LOAD]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_GEP70:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP70]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_LOAD]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_GEP71:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_1_2_GEP71]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_LOAD]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_GEP72:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP72]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_LOAD]], 1, 3 +; POSTPROCESS-NEXT: [[TMP35:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP35]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: unreachable +; POSTPROCESS: 36: +; POSTPROCESS-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP37]]) +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT25:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; POSTPROCESS-NEXT: [[TMP38:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT25]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP38]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT34:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; POSTPROCESS-NEXT: [[TMP39:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT34]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP39]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT43:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; POSTPROCESS-NEXT: [[TMP40:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT43]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP40]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT52:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; POSTPROCESS-NEXT: [[TMP41:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT52]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP41]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; POSTPROCESS-NEXT: [[TMP42:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13]] to i32 +; POSTPROCESS-NEXT: [[TMP43:%.*]] = bitcast i32 [[TMP42]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0245_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP43]], i32 0 +; POSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; POSTPROCESS-NEXT: [[TMP44:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17]] to i32 +; POSTPROCESS-NEXT: [[TMP45:%.*]] = bitcast i32 [[TMP44]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0245_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0245_0_VEC_INSERT]], float [[TMP45]], i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT244:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0245_4_VEC_INSERT]], 0 +; POSTPROCESS-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT73:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT244]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_GEP74:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP46]], i32 0, i32 0 +; POSTPROCESS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT73]], ptr [[DOTFCA_0_GEP74]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP75:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_LOAD76:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP75]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD76]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_GEP78:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_LOAD79:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP78]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT77]], <3 x float> [[DOTFCA_0_1_0_LOAD79]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_GEP81:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_LOAD82:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP81]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT83:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT80]], <3 x float> [[DOTFCA_0_1_1_LOAD82]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_GEP84:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_LOAD85:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP84]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT86:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT83]], float [[DOTFCA_0_1_2_LOAD85]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_GEP87:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_LOAD88:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP87]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT89:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT86]], i32 [[DOTFCA_0_1_3_LOAD88]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_GEP90:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_LOAD91:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP90]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT92:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT89]], <3 x float> [[DOTFCA_0_2_LOAD91]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_GEP93:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_LOAD94:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP93]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT95:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT92]], <3 x float> [[DOTFCA_0_3_LOAD94]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_GEP96:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_LOAD97:%.*]] = load float, ptr [[DOTFCA_0_4_GEP96]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT98:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT95]], float [[DOTFCA_0_4_LOAD97]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_GEP99:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_LOAD100:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP99]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT101:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT98]], i64 [[DOTFCA_0_5_LOAD100]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_GEP102:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_LOAD103:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP102]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT104:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT101]], <3 x float> [[DOTFCA_1_0_LOAD103]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_GEP105:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_LOAD106:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP105]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT107:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT104]], <3 x float> [[DOTFCA_1_1_LOAD106]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_GEP108:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_LOAD109:%.*]] = load float, ptr [[DOTFCA_1_2_GEP108]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT110:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT107]], float [[DOTFCA_1_2_LOAD109]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_GEP111:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_LOAD112:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP111]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT113:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT110]], i32 [[DOTFCA_1_3_LOAD112]], 1, 3 +; POSTPROCESS-NEXT: [[TMP47:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP47]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT113]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: unreachable +; POSTPROCESS: 48: +; POSTPROCESS-NEXT: br i1 [[TMP22]], label [[TMP49:%.*]], label [[TMP70:%.*]] +; POSTPROCESS: 49: +; POSTPROCESS-NEXT: br i1 [[TMP21]], label [[TMP50:%.*]], label [[TMP60:%.*]] +; POSTPROCESS: 50: +; POSTPROCESS-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP51]]) +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT27:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; POSTPROCESS-NEXT: [[TMP52:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT27]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP52]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT36:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; POSTPROCESS-NEXT: [[TMP53:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT36]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP53]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT45:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; POSTPROCESS-NEXT: [[TMP54:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT45]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP54]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT54:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; POSTPROCESS-NEXT: [[TMP55:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT54]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP55]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[TMP56:%.*]] = bitcast i32 [[TMP13]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0249_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP56]], i32 0 +; POSTPROCESS-NEXT: [[TMP57:%.*]] = bitcast i32 [[TMP14]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0249_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0249_0_VEC_INSERT]], float [[TMP57]], i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT248:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0249_4_VEC_INSERT]], 0 +; POSTPROCESS-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT114:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT248]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_GEP115:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP58]], i32 0, i32 0 +; POSTPROCESS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT114]], ptr [[DOTFCA_0_GEP115]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP116:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_LOAD117:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP116]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT118:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD117]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_GEP119:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_LOAD120:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP119]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT121:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT118]], <3 x float> [[DOTFCA_0_1_0_LOAD120]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_GEP122:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_LOAD123:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP122]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT124:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT121]], <3 x float> [[DOTFCA_0_1_1_LOAD123]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_GEP125:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_LOAD126:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP125]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT127:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT124]], float [[DOTFCA_0_1_2_LOAD126]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_GEP128:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_LOAD129:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP128]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT130:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT127]], i32 [[DOTFCA_0_1_3_LOAD129]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_GEP131:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_LOAD132:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP131]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT133:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT130]], <3 x float> [[DOTFCA_0_2_LOAD132]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_GEP134:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_LOAD135:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP134]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT136:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT133]], <3 x float> [[DOTFCA_0_3_LOAD135]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_GEP137:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_LOAD138:%.*]] = load float, ptr [[DOTFCA_0_4_GEP137]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT139:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT136]], float [[DOTFCA_0_4_LOAD138]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_GEP140:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_LOAD141:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP140]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT142:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT139]], i64 [[DOTFCA_0_5_LOAD141]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_GEP143:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_LOAD144:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP143]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT145:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT142]], <3 x float> [[DOTFCA_1_0_LOAD144]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_GEP146:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_LOAD147:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP146]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT148:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT145]], <3 x float> [[DOTFCA_1_1_LOAD147]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_GEP149:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_LOAD150:%.*]] = load float, ptr [[DOTFCA_1_2_GEP149]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT151:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT148]], float [[DOTFCA_1_2_LOAD150]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_GEP152:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_LOAD153:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP152]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT154:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT151]], i32 [[DOTFCA_1_3_LOAD153]], 1, 3 +; POSTPROCESS-NEXT: [[TMP59:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP59]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT154]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: unreachable +; POSTPROCESS: 60: +; POSTPROCESS-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP61]]) +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT29:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; POSTPROCESS-NEXT: [[TMP62:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT29]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP62]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT38:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; POSTPROCESS-NEXT: [[TMP63:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT38]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP63]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT47:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; POSTPROCESS-NEXT: [[TMP64:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT47]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP64]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT56:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; POSTPROCESS-NEXT: [[TMP65:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT56]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP65]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[TMP66:%.*]] = bitcast i32 [[TMP13]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0253_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP66]], i32 0 +; POSTPROCESS-NEXT: [[TMP67:%.*]] = bitcast i32 [[TMP14]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0253_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0253_0_VEC_INSERT]], float [[TMP67]], i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT252:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0253_4_VEC_INSERT]], 0 +; POSTPROCESS-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT155:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT252]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_GEP156:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP68]], i32 0, i32 0 +; POSTPROCESS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT155]], ptr [[DOTFCA_0_GEP156]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP157:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_LOAD158:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP157]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT159:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD158]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_GEP160:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_LOAD161:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP160]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT162:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT159]], <3 x float> [[DOTFCA_0_1_0_LOAD161]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_GEP163:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_LOAD164:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP163]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT165:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT162]], <3 x float> [[DOTFCA_0_1_1_LOAD164]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_GEP166:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_LOAD167:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP166]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT168:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT165]], float [[DOTFCA_0_1_2_LOAD167]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_GEP169:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_LOAD170:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP169]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT171:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT168]], i32 [[DOTFCA_0_1_3_LOAD170]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_GEP172:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_LOAD173:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP172]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT174:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT171]], <3 x float> [[DOTFCA_0_2_LOAD173]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_GEP175:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_LOAD176:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP175]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT177:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT174]], <3 x float> [[DOTFCA_0_3_LOAD176]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_GEP178:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_LOAD179:%.*]] = load float, ptr [[DOTFCA_0_4_GEP178]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT180:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT177]], float [[DOTFCA_0_4_LOAD179]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_GEP181:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_LOAD182:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP181]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT183:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT180]], i64 [[DOTFCA_0_5_LOAD182]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_GEP184:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_LOAD185:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP184]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT186:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT183]], <3 x float> [[DOTFCA_1_0_LOAD185]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_GEP187:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_LOAD188:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP187]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT189:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT186]], <3 x float> [[DOTFCA_1_1_LOAD188]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_GEP190:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_LOAD191:%.*]] = load float, ptr [[DOTFCA_1_2_GEP190]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT192:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT189]], float [[DOTFCA_1_2_LOAD191]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_GEP193:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_LOAD194:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP193]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT195:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT192]], i32 [[DOTFCA_1_3_LOAD194]], 1, 3 +; POSTPROCESS-NEXT: [[TMP69:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP69]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT195]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: unreachable +; POSTPROCESS: 70: +; POSTPROCESS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT31:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; POSTPROCESS-NEXT: [[TMP71:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT31]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP71]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT40:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; POSTPROCESS-NEXT: [[TMP72:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT40]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP72]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT49:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; POSTPROCESS-NEXT: [[TMP73:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT49]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP73]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT58:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; POSTPROCESS-NEXT: [[TMP74:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT58]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP74]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; POSTPROCESS-NEXT: [[TMP75:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15]] to i32 +; POSTPROCESS-NEXT: [[TMP76:%.*]] = bitcast i32 [[TMP75]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0257_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP76]], i32 0 +; POSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; POSTPROCESS-NEXT: [[TMP77:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19]] to i32 +; POSTPROCESS-NEXT: [[TMP78:%.*]] = bitcast i32 [[TMP77]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0257_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0257_0_VEC_INSERT]], float [[TMP78]], i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT256:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0257_4_VEC_INSERT]], 0 +; POSTPROCESS-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT196:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT256]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_GEP197:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP79]], i32 0, i32 0 +; POSTPROCESS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT196]], ptr [[DOTFCA_0_GEP197]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP198:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_LOAD199:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP198]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT200:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD199]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_GEP201:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_LOAD202:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP201]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT203:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT200]], <3 x float> [[DOTFCA_0_1_0_LOAD202]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_GEP204:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_LOAD205:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP204]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT206:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT203]], <3 x float> [[DOTFCA_0_1_1_LOAD205]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_GEP207:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_LOAD208:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP207]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT209:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT206]], float [[DOTFCA_0_1_2_LOAD208]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_GEP210:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_LOAD211:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP210]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT212:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT209]], i32 [[DOTFCA_0_1_3_LOAD211]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_GEP213:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_LOAD214:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP213]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT215:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT212]], <3 x float> [[DOTFCA_0_2_LOAD214]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_GEP216:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_LOAD217:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP216]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT218:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT215]], <3 x float> [[DOTFCA_0_3_LOAD217]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_GEP219:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_LOAD220:%.*]] = load float, ptr [[DOTFCA_0_4_GEP219]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT221:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT218]], float [[DOTFCA_0_4_LOAD220]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_GEP222:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_LOAD223:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP222]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT224:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT221]], i64 [[DOTFCA_0_5_LOAD223]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_GEP225:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_LOAD226:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP225]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT227:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT224]], <3 x float> [[DOTFCA_1_0_LOAD226]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_GEP228:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_LOAD229:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP228]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT230:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT227]], <3 x float> [[DOTFCA_1_1_LOAD229]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_GEP231:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_LOAD232:%.*]] = load float, ptr [[DOTFCA_1_2_GEP231]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT233:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT230]], float [[DOTFCA_1_2_LOAD232]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_GEP234:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_LOAD235:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP234]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT236:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT233]], i32 [[DOTFCA_1_3_LOAD235]], 1, 3 +; POSTPROCESS-NEXT: [[TMP80:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP80]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT236]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define void @MyIntersectionShader( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META32:![0-9]+]] !continuation [[META41:![0-9]+]] !continuation.stacksize [[META42:![0-9]+]] !continuation.state [[META42]] { +; POSTPROCESS-NEXT: AllocaSpillBB: +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 +; POSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 +; POSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> +; POSTPROCESS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP5]], 0 +; POSTPROCESS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 +; POSTPROCESS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP6]], 3 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; POSTPROCESS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] +; POSTPROCESS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; POSTPROCESS: callAHit.i: +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShader.resume.0 to i64)) +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP7]], i64 [[TMP8]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_3_INSERT]], float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 +; POSTPROCESS-NEXT: unreachable +; POSTPROCESS: isEnd.i: +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 +; POSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP12]], i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 +; POSTPROCESS-NEXT: [[TMP13:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP13]], <2 x i32> poison, <3 x i32> +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] +; POSTPROCESS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; POSTPROCESS-NEXT: br i1 [[ISEND_I1]], label [[TMP14:%.*]], label [[TMP18:%.*]] +; POSTPROCESS: 14: +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], -8 +; POSTPROCESS-NEXT: store i32 [[TMP16]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP17]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: unreachable +; POSTPROCESS: 18: +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], -8 +; POSTPROCESS-NEXT: store i32 [[TMP20]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP21]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define dso_local void @MyIntersectionShader.resume.0( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META41]] { +; POSTPROCESS-NEXT: entryresume.0: +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -8 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; POSTPROCESS-NEXT: br i1 [[ISEND_I1]], label [[TMP3:%.*]], label [[TMP9:%.*]] +; POSTPROCESS: 3: +; POSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 +; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[TMP5]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], -8 +; POSTPROCESS-NEXT: store i32 [[TMP7]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: unreachable +; POSTPROCESS: 9: +; POSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 +; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP11]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 +; POSTPROCESS-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define void @MyIntersectionShader2( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META43:![0-9]+]] !continuation.stacksize [[META42]] !continuation.state [[META42]] { +; POSTPROCESS-NEXT: AllocaSpillBB: +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 +; POSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 +; POSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> +; POSTPROCESS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP5]], 0 +; POSTPROCESS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 +; POSTPROCESS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP6]], 3 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; POSTPROCESS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; POSTPROCESS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] +; POSTPROCESS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; POSTPROCESS: callAHit.i: +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]] poison, <2 x float> undef, 0 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShader2.resume.0 to i64)) +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP7]], i64 [[TMP8]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_3_INSERT]], float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 +; POSTPROCESS-NEXT: unreachable +; POSTPROCESS: isEnd.i: +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 +; POSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP12]], i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 +; POSTPROCESS-NEXT: [[TMP13:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP13]], <2 x i32> poison, <3 x i32> +; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] +; POSTPROCESS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; POSTPROCESS-NEXT: br i1 [[ISEND_I1]], label [[TMP14:%.*]], label [[TMP18:%.*]] +; POSTPROCESS: 14: +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], -8 +; POSTPROCESS-NEXT: store i32 [[TMP16]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP17]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: unreachable +; POSTPROCESS: 18: +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], -8 +; POSTPROCESS-NEXT: store i32 [[TMP20]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP21]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define dso_local void @MyIntersectionShader2.resume.0( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META43]] { +; POSTPROCESS-NEXT: entryresume.0: +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -8 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; POSTPROCESS-NEXT: br i1 [[ISEND_I1]], label [[TMP3:%.*]], label [[TMP9:%.*]] +; POSTPROCESS: 3: +; POSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 +; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[TMP5]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], -8 +; POSTPROCESS-NEXT: store i32 [[TMP7]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: unreachable +; POSTPROCESS: 9: +; POSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 +; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP11]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 +; POSTPROCESS-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: unreachable +; +; +; POSTPROCESS-LABEL: define void @MyMissShader( +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META44:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META45:![0-9]+]] !continuation.state [[META22]] { +; POSTPROCESS-NEXT: AllocaSpillBB: +; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP6]], i32 2 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP8]], i32 3 +; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 +; POSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 +; POSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP13]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: unreachable +; +; ; POSTPROCESS-CPS-LABEL: define i32 @_cont_GetContinuationStackAddr( ; POSTPROCESS-CPS-SAME: ) #[[ATTR0:[0-9]+]] { ; POSTPROCESS-CPS-NEXT: ret i32 0 @@ -2755,78 +4404,95 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: AllocaSpillBB: ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[_CONT_SETUPRAYGEN:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT20:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) -; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP5]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP5]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP6]]) -; POSTPROCESS-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT20]], 0 ; POSTPROCESS-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyRayGen.resume.0) -; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP9]], 5 +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyRayGen.resume.0 to i64)) +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP8]], 5 ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP10]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 -; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 -; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i32 [[TMP8]], i32 5), !continuation.returnedRegistercount !33, !continuation.registercount [[META33:![0-9]+]] +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP9]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 undef, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 undef, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 undef, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 undef, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 undef, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 undef, 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP10]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP11]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP12]], 9 +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyRayGen.resume.0 to i64)) +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP13]], i64 [[TMP14]], i32 5, [36 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.returnedRegistercount !33, !continuation.registercount [[META33:![0-9]+]] ; POSTPROCESS-CPS-NEXT: unreachable ; ; ; POSTPROCESS-CPS-LABEL: define dso_local void @MyRayGen.resume.0( -; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META22]] !lgc.cps [[META22]] !continuation [[META35]] { +; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [33 x i32], [10 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META22]] !lgc.cps [[META22]] !continuation [[META35]] { ; POSTPROCESS-CPS-NEXT: entryresume.0: -; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] }, align 8 ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], ptr [[TMP4]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; POSTPROCESS-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], ptr [[TMP4]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 9 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[DOTFCA_0_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[DOTFCA_7_EXTRACT]] to float ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP10]], i32 2 -; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP12]], i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT6:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[DOTFCA_8_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP9]], i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[DOTFCA_9_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT21:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; POSTPROCESS-CPS-NEXT: [[RES_1_I1:%.*]] = load i32, ptr [[TMP4]], align 4 -; POSTPROCESS-CPS-NEXT: [[RESPTR_2_I2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[RES_1_I1:%.*]] = load i32, ptr [[TMP12]], align 4 +; POSTPROCESS-CPS-NEXT: [[RESPTR_2_I2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP12]], i32 0, i32 0, i32 1 ; POSTPROCESS-CPS-NEXT: [[RES_2_I3:%.*]] = load i32, ptr [[RESPTR_2_I2]], align 4 -; POSTPROCESS-CPS-NEXT: [[RESPTR_3_I4:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[RESPTR_3_I4:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP12]], i32 0, i32 0, i32 2 ; POSTPROCESS-CPS-NEXT: [[RES_3_I5:%.*]] = load i32, ptr [[RESPTR_3_I4]], align 4 ; POSTPROCESS-CPS-NEXT: [[VAL_0_I6:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I1]], i32 0 ; POSTPROCESS-CPS-NEXT: [[VAL_1_I7:%.*]] = insertelement <3 x i32> [[VAL_0_I6]], i32 [[RES_2_I3]], i32 1 ; POSTPROCESS-CPS-NEXT: [[VAL_2_I8:%.*]] = insertelement <3 x i32> [[VAL_1_I7]], i32 [[RES_3_I5]], i32 2 ; POSTPROCESS-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[VAL_2_I8]], i8 0 -; POSTPROCESS-CPS-NEXT: [[RES_1_I:%.*]] = load i32, ptr [[TMP4]], align 4 -; POSTPROCESS-CPS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[RES_1_I:%.*]] = load i32, ptr [[TMP13]], align 4 +; POSTPROCESS-CPS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP13]], i32 0, i32 0, i32 1 ; POSTPROCESS-CPS-NEXT: [[RES_2_I:%.*]] = load i32, ptr [[RESPTR_2_I]], align 4 -; POSTPROCESS-CPS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP4]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP13]], i32 0, i32 0, i32 2 ; POSTPROCESS-CPS-NEXT: [[RES_3_I:%.*]] = load i32, ptr [[RESPTR_3_I]], align 4 ; POSTPROCESS-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I]], i32 0 ; POSTPROCESS-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x i32> [[VAL_0_I]], i32 [[RES_2_I]], i32 1 ; POSTPROCESS-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x i32> [[VAL_1_I]], i32 [[RES_3_I]], i32 2 ; POSTPROCESS-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[VAL_2_I]], i8 1 -; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP13]]) -; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP14]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP11]]) +; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP14]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) ; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 ; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 ; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 @@ -2836,111 +4502,155 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; POSTPROCESS-CPS-LABEL: define void @MyClosestHitShader( -; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META36:![0-9]+]] !lgc.cps [[META37:![0-9]+]] !continuation [[META38:![0-9]+]] { +; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META36:![0-9]+]] !lgc.cps [[META37:![0-9]+]] !continuation [[META38:![0-9]+]] { ; POSTPROCESS-CPS-NEXT: AllocaSpillBB: ; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_03_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[DOTSROA_03_0_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float -; POSTPROCESS-CPS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_03_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_03_4_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float -; POSTPROCESS-CPS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP5]], i32 1 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 0, 0 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <3 x i32> [[SYSTEM_DATA_FCA_0_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP4]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_012_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_012_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; POSTPROCESS-CPS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP6]], i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_012_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_012_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; POSTPROCESS-CPS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = fsub fast float 1.000000e+00, [[TMP6]] -; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = fsub fast float [[TMP7]], [[TMP8]] -; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i64 0 -; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP6]], i64 1 -; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 2 -; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float 1.000000e+00, i64 3 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP14]], ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 2 -; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 3 -; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP18]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = fsub fast float 1.000000e+00, [[TMP9]] +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = fsub fast float [[TMP10]], [[TMP11]] +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = insertelement <4 x float> undef, float [[TMP12]], i64 0 +; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i64 1 +; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 2 +; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float 1.000000e+00, i64 3 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP21]], i32 0, i32 0 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP20]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT10:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP17]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP18]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP19]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP20]], 9 +; POSTPROCESS-CPS-NEXT: [[TMP22:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP23]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP24:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP25:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP24]], i32 [[TMP25]], i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT10]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] ; POSTPROCESS-CPS-NEXT: unreachable ; ; ; POSTPROCESS-CPS-LABEL: define void @MyAnyHitShader( -; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META39:![0-9]+]] !lgc.cps [[META36]] !continuation [[META40:![0-9]+]] { +; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[HIT_ATTRS:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META39:![0-9]+]] !lgc.cps [[META36]] !continuation [[META40:![0-9]+]] { ; POSTPROCESS-CPS-NEXT: AllocaSpillBB: ; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: store <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_0_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; POSTPROCESS-CPS-NEXT: store <3 x float> [[DOTFCA_0_1_0_EXTRACT]], ptr [[DOTFCA_0_1_0_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; POSTPROCESS-CPS-NEXT: store <3 x float> [[DOTFCA_0_1_1_EXTRACT]], ptr [[DOTFCA_0_1_1_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; POSTPROCESS-CPS-NEXT: store float [[DOTFCA_0_1_2_EXTRACT]], ptr [[DOTFCA_0_1_2_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; POSTPROCESS-CPS-NEXT: store i32 [[DOTFCA_0_1_3_EXTRACT]], ptr [[DOTFCA_0_1_3_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; POSTPROCESS-CPS-NEXT: store <3 x float> [[DOTFCA_0_2_EXTRACT]], ptr [[DOTFCA_0_2_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; POSTPROCESS-CPS-NEXT: store <3 x float> [[DOTFCA_0_3_EXTRACT]], ptr [[DOTFCA_0_3_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; POSTPROCESS-CPS-NEXT: store float [[DOTFCA_0_4_EXTRACT]], ptr [[DOTFCA_0_4_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; POSTPROCESS-CPS-NEXT: store i64 [[DOTFCA_0_5_EXTRACT]], ptr [[DOTFCA_0_5_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; POSTPROCESS-CPS-NEXT: store <3 x float> [[DOTFCA_1_0_EXTRACT]], ptr [[DOTFCA_1_0_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; POSTPROCESS-CPS-NEXT: store <3 x float> [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; POSTPROCESS-CPS-NEXT: store float [[DOTFCA_1_2_EXTRACT]], ptr [[DOTFCA_1_2_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; POSTPROCESS-CPS-NEXT: store i32 [[DOTFCA_1_3_EXTRACT]], ptr [[DOTFCA_1_3_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP2]]) -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT233:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP3]], 0 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0235_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT233]], i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0235_0_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0235_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT233]], i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0235_4_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_0_0_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_1_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: store float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_2_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: store i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_3_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_2_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_3_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-CPS-NEXT: store float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_4_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-CPS-NEXT: store i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_5_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_1_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: store float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_2_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: store i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_3_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP4]]) +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT387:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP5]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0389_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT387]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0389_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0389_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT387]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0389_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[HIT_ATTRS_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[HIT_ATTRS]], 0 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA:%.*]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_0_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I1_FCA_0_LOAD]], 0 @@ -2957,14 +4667,14 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 1 ; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 2 ; POSTPROCESS-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 3 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0256_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0256_4_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0256_8_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 2 -; POSTPROCESS-CPS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_0256_0_VEC_EXTRACT]], i32 0 -; POSTPROCESS-CPS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[DOTSROA_0256_4_VEC_EXTRACT]], i32 1 -; POSTPROCESS-CPS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[DOTSROA_0256_8_VEC_EXTRACT]], i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0411_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0411_4_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0411_8_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 2 +; POSTPROCESS-CPS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_0411_0_VEC_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[DOTSROA_0411_4_VEC_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[DOTSROA_0411_8_VEC_EXTRACT]], i32 2 ; POSTPROCESS-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I9]], i8 0 -; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_0_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I_FCA_0_LOAD]], 0 @@ -2988,7 +4698,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[DOTSROA_1_16_VEC_EXTRACT]], i32 1 ; POSTPROCESS-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[DOTSROA_1_20_VEC_EXTRACT]], i32 2 ; POSTPROCESS-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 -; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_0_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I10_FCA_0_LOAD]], 0 @@ -3005,1789 +4715,1266 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 1 ; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 2 ; POSTPROCESS-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 3 -; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = fmul fast float [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT]], [[EXTRACT]] -; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = fadd fast float [[TMP9]], [[EXTRACT1]] -; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], 0.000000e+00 -; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = fcmp fast ogt float [[TMP10]], 1.000000e+00 -; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP10]], -1.000000e+00 -; POSTPROCESS-CPS-NEXT: br i1 [[TMP11]], label [[TMP14:%.*]], label [[TMP41:%.*]] -; POSTPROCESS-CPS: 14: -; POSTPROCESS-CPS-NEXT: br i1 [[TMP12]], label [[TMP15:%.*]], label [[TMP28:%.*]] -; POSTPROCESS-CPS: 15: -; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP16]]) -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP17]], ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 2 -; POSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 3 -; POSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP21]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0237_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP22]], i32 0 -; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP23:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = fmul fast float [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT]], [[EXTRACT]] +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[EXTRACT1]] +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], 0.000000e+00 +; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], 1.000000e+00 +; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP12]], -1.000000e+00 +; POSTPROCESS-CPS-NEXT: br i1 [[TMP13]], label [[TMP16:%.*]], label [[TMP47:%.*]] +; POSTPROCESS-CPS: 16: +; POSTPROCESS-CPS-NEXT: br i1 [[TMP14]], label [[TMP17:%.*]], label [[TMP32:%.*]] +; POSTPROCESS-CPS: 17: +; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP18]]) +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP22:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP23:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 ; POSTPROCESS-CPS-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0237_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0237_0_VEC_INSERT]], float [[TMP24]], i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0237_4_VEC_INSERT]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT55:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP25]], i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT55]], ptr [[DOTFCA_0_GEP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP56:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP56]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0392_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP24]], i32 0 +; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP25:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP26:%.*]] = bitcast i32 [[TMP25]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0392_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0392_0_VEC_INSERT]], float [[TMP26]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT391:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0392_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT391]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP27]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT]], ptr [[DOTFCA_0_GEP]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP57:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP57]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP58:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP58]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP59:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP59]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_LOAD]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP60:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP60]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_LOAD]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP61:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP61]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP62:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP62]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP63:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP63]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP64:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP64]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_LOAD]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP65:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP65]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_LOAD]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP66:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP66]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_LOAD]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP67:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_1_2_GEP67]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_1_2_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_LOAD]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP68]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP]], align 4 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_LOAD]], 1, 3 -; POSTPROCESS-CPS-NEXT: [[TMP26:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP27:%.*]] = add i32 [[TMP26]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP27]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP19]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP20]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP21]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP22]], 9 +; POSTPROCESS-CPS-NEXT: [[TMP28:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP29]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP30:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP31:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP30]], i32 [[TMP31]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] ; POSTPROCESS-CPS-NEXT: unreachable -; POSTPROCESS-CPS: 28: -; POSTPROCESS-CPS-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP29]]) -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT24:%.*]] = extractelement <4 x float> undef, i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP30:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT24]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP30]], ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT32:%.*]] = extractelement <4 x float> undef, i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP31:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT32]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP31]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT40:%.*]] = extractelement <4 x float> undef, i32 2 -; POSTPROCESS-CPS-NEXT: [[TMP32:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT40]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP32]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT48:%.*]] = extractelement <4 x float> undef, i32 3 -; POSTPROCESS-CPS-NEXT: [[TMP33:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT48]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP33]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP34:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13]] to i32 -; POSTPROCESS-CPS-NEXT: [[TMP35:%.*]] = bitcast i32 [[TMP34]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0241_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP35]], i32 0 -; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP36:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17]] to i32 -; POSTPROCESS-CPS-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0241_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0241_0_VEC_INSERT]], float [[TMP37]], i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT240:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0241_4_VEC_INSERT]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT69:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT240]], 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP70:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP38]], i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT69]], ptr [[DOTFCA_0_GEP70]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP71:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD72:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP71]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT73:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD72]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP74:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD75:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP74]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT76:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT73]], <3 x float> [[DOTFCA_0_1_0_LOAD75]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP77:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD78:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP77]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT79:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT76]], <3 x float> [[DOTFCA_0_1_1_LOAD78]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP80:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD81:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP80]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT82:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT79]], float [[DOTFCA_0_1_2_LOAD81]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP83:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD84:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP83]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT85:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT82]], i32 [[DOTFCA_0_1_3_LOAD84]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP86:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD87:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP86]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT88:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT85]], <3 x float> [[DOTFCA_0_2_LOAD87]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP89:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD90:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP89]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT91:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT88]], <3 x float> [[DOTFCA_0_3_LOAD90]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP92:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD93:%.*]] = load float, ptr [[DOTFCA_0_4_GEP92]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT94:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT91]], float [[DOTFCA_0_4_LOAD93]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP95:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD96:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP95]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT97:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT94]], i64 [[DOTFCA_0_5_LOAD96]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP98:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD99:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP98]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT100:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT97]], <3 x float> [[DOTFCA_1_0_LOAD99]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP101:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD102:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP101]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT103:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT100]], <3 x float> [[DOTFCA_1_1_LOAD102]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP104:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD105:%.*]] = load float, ptr [[DOTFCA_1_2_GEP104]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT106:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT103]], float [[DOTFCA_1_2_LOAD105]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP107:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD108:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP107]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT109:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT106]], i32 [[DOTFCA_1_3_LOAD108]], 1, 3 -; POSTPROCESS-CPS-NEXT: [[TMP39:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP40:%.*]] = add i32 [[TMP39]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP40]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT109]]) +; POSTPROCESS-CPS: 32: +; POSTPROCESS-CPS-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP33]]) +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT25:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP34:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT25]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT34:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP35:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT34]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT43:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP36:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT43]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT52:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP37:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT52]] to i32 +; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP38:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP39:%.*]] = bitcast i32 [[TMP38]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0396_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP39]], i32 0 +; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP40:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP41:%.*]] = bitcast i32 [[TMP40]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0396_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0396_0_VEC_INSERT]], float [[TMP41]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT395:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0396_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT223:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT395]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP224:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP42]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT223]], ptr [[DOTFCA_0_GEP224]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP225:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD226:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP225]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT227:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD226]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP228:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD229:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP228]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT230:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT227]], <3 x float> [[DOTFCA_0_1_0_LOAD229]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP231:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD232:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP231]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT233:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT230]], <3 x float> [[DOTFCA_0_1_1_LOAD232]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP234:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD235:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP234]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT236:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT233]], float [[DOTFCA_0_1_2_LOAD235]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP237:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD238:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP237]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT239:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT236]], i32 [[DOTFCA_0_1_3_LOAD238]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP240:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD241:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP240]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT242:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT239]], <3 x float> [[DOTFCA_0_2_LOAD241]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP243:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD244:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP243]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT245:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT242]], <3 x float> [[DOTFCA_0_3_LOAD244]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP246:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD247:%.*]] = load float, ptr [[DOTFCA_0_4_GEP246]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT248:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT245]], float [[DOTFCA_0_4_LOAD247]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP249:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD250:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP249]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT251:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT248]], i64 [[DOTFCA_0_5_LOAD250]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP252:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD253:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP252]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT254:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT251]], <3 x float> [[DOTFCA_1_0_LOAD253]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP255:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD256:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP255]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT257:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT254]], <3 x float> [[DOTFCA_1_1_LOAD256]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP258:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD259:%.*]] = load float, ptr [[DOTFCA_1_2_GEP258]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT260:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT257]], float [[DOTFCA_1_2_LOAD259]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP261:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD262:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP261]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT263:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT260]], i32 [[DOTFCA_1_3_LOAD262]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT61:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP34]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT64:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT61]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT67:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT64]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT70:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT67]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT73:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT70]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT76:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT73]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT79:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT76]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT82:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT79]], i32 [[TMP35]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT85:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT82]], i32 [[TMP36]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT88:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT85]], i32 [[TMP37]], 9 +; POSTPROCESS-CPS-NEXT: [[TMP43:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP44:%.*]] = add i32 [[TMP43]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP44]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP45:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP46:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP45]], i32 [[TMP46]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT263]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT88]]), !continuation.registercount [[META33]] ; POSTPROCESS-CPS-NEXT: unreachable -; POSTPROCESS-CPS: 41: -; POSTPROCESS-CPS-NEXT: br i1 [[TMP13]], label [[TMP42:%.*]], label [[TMP65:%.*]] -; POSTPROCESS-CPS: 42: -; POSTPROCESS-CPS-NEXT: br i1 [[TMP12]], label [[TMP43:%.*]], label [[TMP54:%.*]] -; POSTPROCESS-CPS: 43: -; POSTPROCESS-CPS-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP44]]) -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT26:%.*]] = extractelement <4 x float> undef, i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP45:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT26]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP45]], ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT34:%.*]] = extractelement <4 x float> undef, i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP46:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT34]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP46]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT42:%.*]] = extractelement <4 x float> undef, i32 2 -; POSTPROCESS-CPS-NEXT: [[TMP47:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT42]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP47]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT50:%.*]] = extractelement <4 x float> undef, i32 3 -; POSTPROCESS-CPS-NEXT: [[TMP48:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT50]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP48]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[TMP49:%.*]] = bitcast i32 [[TMP4]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0245_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP49]], i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP50:%.*]] = bitcast i32 [[TMP5]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0245_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0245_0_VEC_INSERT]], float [[TMP50]], i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT244:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0245_4_VEC_INSERT]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT110:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT244]], 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP111:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP51]], i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT110]], ptr [[DOTFCA_0_GEP111]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP112:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD113:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP112]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT114:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD113]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP115:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD116:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP115]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT117:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT114]], <3 x float> [[DOTFCA_0_1_0_LOAD116]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP118:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD119:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP118]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT120:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT117]], <3 x float> [[DOTFCA_0_1_1_LOAD119]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP121:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD122:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP121]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT123:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT120]], float [[DOTFCA_0_1_2_LOAD122]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP124:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD125:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP124]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT126:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT123]], i32 [[DOTFCA_0_1_3_LOAD125]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP127:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD128:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP127]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT129:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT126]], <3 x float> [[DOTFCA_0_2_LOAD128]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP130:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD131:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP130]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT132:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT129]], <3 x float> [[DOTFCA_0_3_LOAD131]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP133:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD134:%.*]] = load float, ptr [[DOTFCA_0_4_GEP133]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT135:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT132]], float [[DOTFCA_0_4_LOAD134]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP136:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD137:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP136]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT138:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT135]], i64 [[DOTFCA_0_5_LOAD137]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP139:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD140:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP139]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT141:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT138]], <3 x float> [[DOTFCA_1_0_LOAD140]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP142:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD143:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP142]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT144:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT141]], <3 x float> [[DOTFCA_1_1_LOAD143]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP145:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD146:%.*]] = load float, ptr [[DOTFCA_1_2_GEP145]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT147:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT144]], float [[DOTFCA_1_2_LOAD146]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP148:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD149:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP148]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT150:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT147]], i32 [[DOTFCA_1_3_LOAD149]], 1, 3 -; POSTPROCESS-CPS-NEXT: [[TMP52:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP53:%.*]] = add i32 [[TMP52]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP53]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT150]]) +; POSTPROCESS-CPS: 47: +; POSTPROCESS-CPS-NEXT: br i1 [[TMP15]], label [[TMP48:%.*]], label [[TMP75:%.*]] +; POSTPROCESS-CPS: 48: +; POSTPROCESS-CPS-NEXT: br i1 [[TMP14]], label [[TMP49:%.*]], label [[TMP62:%.*]] +; POSTPROCESS-CPS: 49: +; POSTPROCESS-CPS-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP50]]) +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT27:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP51:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT27]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT36:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP52:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT36]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT45:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP53:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT45]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT54:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP54:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT54]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP55:%.*]] = bitcast i32 [[TMP6]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0400_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP55]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP56:%.*]] = bitcast i32 [[TMP7]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0400_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0400_0_VEC_INSERT]], float [[TMP56]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT399:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0400_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT264:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT399]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP265:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP57]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT264]], ptr [[DOTFCA_0_GEP265]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP266:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD267:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP266]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT268:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD267]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP269:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD270:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP269]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT271:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT268]], <3 x float> [[DOTFCA_0_1_0_LOAD270]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP272:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD273:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP272]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT274:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT271]], <3 x float> [[DOTFCA_0_1_1_LOAD273]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP275:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD276:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP275]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT277:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT274]], float [[DOTFCA_0_1_2_LOAD276]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP278:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD279:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP278]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT280:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT277]], i32 [[DOTFCA_0_1_3_LOAD279]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP281:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD282:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP281]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT283:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT280]], <3 x float> [[DOTFCA_0_2_LOAD282]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP284:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD285:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP284]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT286:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT283]], <3 x float> [[DOTFCA_0_3_LOAD285]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP287:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD288:%.*]] = load float, ptr [[DOTFCA_0_4_GEP287]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT286]], float [[DOTFCA_0_4_LOAD288]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP290:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD291:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP290]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT289]], i64 [[DOTFCA_0_5_LOAD291]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP293:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD294:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP293]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT292]], <3 x float> [[DOTFCA_1_0_LOAD294]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP296:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD297:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP296]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT295]], <3 x float> [[DOTFCA_1_1_LOAD297]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP299:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD300:%.*]] = load float, ptr [[DOTFCA_1_2_GEP299]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT298]], float [[DOTFCA_1_2_LOAD300]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP302:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD303:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP302]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT301]], i32 [[DOTFCA_1_3_LOAD303]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT91:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP51]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT94:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT91]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT97:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT94]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT100:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT97]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT103:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT100]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT106:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT103]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT109:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT106]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT112:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT109]], i32 [[TMP52]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT115:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT112]], i32 [[TMP53]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT118:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT115]], i32 [[TMP54]], 9 +; POSTPROCESS-CPS-NEXT: [[TMP58:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP59:%.*]] = add i32 [[TMP58]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP59]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP60:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP61:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP60]], i32 [[TMP61]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT304]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT118]]), !continuation.registercount [[META33]] ; POSTPROCESS-CPS-NEXT: unreachable -; POSTPROCESS-CPS: 54: -; POSTPROCESS-CPS-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP55]]) -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT28:%.*]] = extractelement <4 x float> undef, i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP56:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT28]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP56]], ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT36:%.*]] = extractelement <4 x float> undef, i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP57:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT36]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP57]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT44:%.*]] = extractelement <4 x float> undef, i32 2 -; POSTPROCESS-CPS-NEXT: [[TMP58:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT44]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP58]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT52:%.*]] = extractelement <4 x float> undef, i32 3 -; POSTPROCESS-CPS-NEXT: [[TMP59:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT52]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP59]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[TMP60:%.*]] = bitcast i32 [[TMP4]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0249_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP60]], i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP61:%.*]] = bitcast i32 [[TMP5]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0249_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0249_0_VEC_INSERT]], float [[TMP61]], i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT248:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0249_4_VEC_INSERT]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT151:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT248]], 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP152:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP62]], i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT151]], ptr [[DOTFCA_0_GEP152]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP153:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD154:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP153]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT155:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD154]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP156:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD157:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP156]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT158:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT155]], <3 x float> [[DOTFCA_0_1_0_LOAD157]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP159:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD160:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP159]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT161:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT158]], <3 x float> [[DOTFCA_0_1_1_LOAD160]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP162:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD163:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP162]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT164:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT161]], float [[DOTFCA_0_1_2_LOAD163]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP165:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD166:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP165]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT167:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT164]], i32 [[DOTFCA_0_1_3_LOAD166]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP168:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD169:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP168]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT170:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT167]], <3 x float> [[DOTFCA_0_2_LOAD169]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP171:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD172:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP171]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT173:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT170]], <3 x float> [[DOTFCA_0_3_LOAD172]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP174:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD175:%.*]] = load float, ptr [[DOTFCA_0_4_GEP174]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT176:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT173]], float [[DOTFCA_0_4_LOAD175]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP177:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD178:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP177]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT179:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT176]], i64 [[DOTFCA_0_5_LOAD178]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP180:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD181:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP180]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT182:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT179]], <3 x float> [[DOTFCA_1_0_LOAD181]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP183:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD184:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP183]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT185:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT182]], <3 x float> [[DOTFCA_1_1_LOAD184]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP186:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD187:%.*]] = load float, ptr [[DOTFCA_1_2_GEP186]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT188:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT185]], float [[DOTFCA_1_2_LOAD187]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP189:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD190:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP189]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT191:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT188]], i32 [[DOTFCA_1_3_LOAD190]], 1, 3 -; POSTPROCESS-CPS-NEXT: [[TMP63:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP64:%.*]] = add i32 [[TMP63]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP64]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT191]]) +; POSTPROCESS-CPS: 62: +; POSTPROCESS-CPS-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP63]]) +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT29:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP64:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT29]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT38:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP65:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT38]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT47:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP66:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT47]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT56:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP67:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT56]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP68:%.*]] = bitcast i32 [[TMP6]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0404_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP68]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP69:%.*]] = bitcast i32 [[TMP7]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0404_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0404_0_VEC_INSERT]], float [[TMP69]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT403:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0404_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT305:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT403]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP306:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP70]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT305]], ptr [[DOTFCA_0_GEP306]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP307:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD308:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP307]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT309:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD308]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP310:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD311:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP310]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT312:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT309]], <3 x float> [[DOTFCA_0_1_0_LOAD311]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP313:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD314:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP313]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT315:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT312]], <3 x float> [[DOTFCA_0_1_1_LOAD314]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP316:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD317:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP316]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT318:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT315]], float [[DOTFCA_0_1_2_LOAD317]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP319:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD320:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP319]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT321:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT318]], i32 [[DOTFCA_0_1_3_LOAD320]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP322:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD323:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP322]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT324:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT321]], <3 x float> [[DOTFCA_0_2_LOAD323]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP325:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD326:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP325]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT327:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT324]], <3 x float> [[DOTFCA_0_3_LOAD326]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP328:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD329:%.*]] = load float, ptr [[DOTFCA_0_4_GEP328]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT330:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT327]], float [[DOTFCA_0_4_LOAD329]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP331:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD332:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP331]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT333:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT330]], i64 [[DOTFCA_0_5_LOAD332]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP334:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD335:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP334]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT336:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT333]], <3 x float> [[DOTFCA_1_0_LOAD335]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP337:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD338:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP337]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT339:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT336]], <3 x float> [[DOTFCA_1_1_LOAD338]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP340:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD341:%.*]] = load float, ptr [[DOTFCA_1_2_GEP340]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT342:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT339]], float [[DOTFCA_1_2_LOAD341]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP343:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD344:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP343]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT345:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT342]], i32 [[DOTFCA_1_3_LOAD344]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT121:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP64]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT124:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT121]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT127:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT124]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT130:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT127]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT133:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT130]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT136:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT133]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT139:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT136]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT142:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT139]], i32 [[TMP65]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT145:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT142]], i32 [[TMP66]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT148:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT145]], i32 [[TMP67]], 9 +; POSTPROCESS-CPS-NEXT: [[TMP71:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP72]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP73:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP74:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP73]], i32 [[TMP74]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT345]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT148]]), !continuation.registercount [[META33]] ; POSTPROCESS-CPS-NEXT: unreachable -; POSTPROCESS-CPS: 65: +; POSTPROCESS-CPS: 75: ; POSTPROCESS-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT30:%.*]] = extractelement <4 x float> undef, i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP66:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT30]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP66]], ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT38:%.*]] = extractelement <4 x float> undef, i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP67:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT38]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP67]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT46:%.*]] = extractelement <4 x float> undef, i32 2 -; POSTPROCESS-CPS-NEXT: [[TMP68:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT46]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP68]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT54:%.*]] = extractelement <4 x float> undef, i32 3 -; POSTPROCESS-CPS-NEXT: [[TMP69:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT54]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP69]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP70:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15]] to i32 -; POSTPROCESS-CPS-NEXT: [[TMP71:%.*]] = bitcast i32 [[TMP70]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0253_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP71]], i32 0 -; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP72:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19]] to i32 -; POSTPROCESS-CPS-NEXT: [[TMP73:%.*]] = bitcast i32 [[TMP72]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0253_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0253_0_VEC_INSERT]], float [[TMP73]], i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT252:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0253_4_VEC_INSERT]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT192:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT252]], 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP193:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP74]], i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT192]], ptr [[DOTFCA_0_GEP193]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP194:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD195:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP194]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT196:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD195]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP197:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD198:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP197]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT199:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT196]], <3 x float> [[DOTFCA_0_1_0_LOAD198]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP200:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD201:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP200]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT202:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT199]], <3 x float> [[DOTFCA_0_1_1_LOAD201]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP203:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD204:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP203]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT205:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT202]], float [[DOTFCA_0_1_2_LOAD204]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP206:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD207:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP206]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT208:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT205]], i32 [[DOTFCA_0_1_3_LOAD207]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP209:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD210:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP209]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT211:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT208]], <3 x float> [[DOTFCA_0_2_LOAD210]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP212:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD213:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP212]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT214:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT211]], <3 x float> [[DOTFCA_0_3_LOAD213]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP215:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD216:%.*]] = load float, ptr [[DOTFCA_0_4_GEP215]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT217:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT214]], float [[DOTFCA_0_4_LOAD216]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP218:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD219:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP218]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT220:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT217]], i64 [[DOTFCA_0_5_LOAD219]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP221:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD222:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP221]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT223:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT220]], <3 x float> [[DOTFCA_1_0_LOAD222]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP224:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD225:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP224]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT226:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT223]], <3 x float> [[DOTFCA_1_1_LOAD225]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP227:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD228:%.*]] = load float, ptr [[DOTFCA_1_2_GEP227]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT229:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT226]], float [[DOTFCA_1_2_LOAD228]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP230:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD231:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP230]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT232:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT229]], i32 [[DOTFCA_1_3_LOAD231]], 1, 3 -; POSTPROCESS-CPS-NEXT: [[TMP75:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP76:%.*]] = add i32 [[TMP75]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP76]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT232]]) +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT31:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP76:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT31]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT40:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP77:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT40]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT49:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP78:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT49]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT58:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP79:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT58]] to i32 +; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP80:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP81:%.*]] = bitcast i32 [[TMP80]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0408_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP81]], i32 0 +; POSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP82:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19]] to i32 +; POSTPROCESS-CPS-NEXT: [[TMP83:%.*]] = bitcast i32 [[TMP82]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0408_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0408_0_VEC_INSERT]], float [[TMP83]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT407:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0408_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT346:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT407]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP347:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP84]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT346]], ptr [[DOTFCA_0_GEP347]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP348:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD349:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP348]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT350:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD349]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP351:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD352:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP351]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT353:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT350]], <3 x float> [[DOTFCA_0_1_0_LOAD352]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_GEP354:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_LOAD355:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP354]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT356:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT353]], <3 x float> [[DOTFCA_0_1_1_LOAD355]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_GEP357:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_LOAD358:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP357]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT359:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT356]], float [[DOTFCA_0_1_2_LOAD358]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_GEP360:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_LOAD361:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP360]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT362:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT359]], i32 [[DOTFCA_0_1_3_LOAD361]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_GEP363:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_LOAD364:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP363]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT365:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT362]], <3 x float> [[DOTFCA_0_2_LOAD364]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_GEP366:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_LOAD367:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP366]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT368:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT365]], <3 x float> [[DOTFCA_0_3_LOAD367]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_GEP369:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_LOAD370:%.*]] = load float, ptr [[DOTFCA_0_4_GEP369]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT371:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT368]], float [[DOTFCA_0_4_LOAD370]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_GEP372:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_LOAD373:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP372]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT374:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT371]], i64 [[DOTFCA_0_5_LOAD373]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP375:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD376:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP375]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT377:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT374]], <3 x float> [[DOTFCA_1_0_LOAD376]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP378:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD379:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP378]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT380:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT377]], <3 x float> [[DOTFCA_1_1_LOAD379]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_GEP381:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_LOAD382:%.*]] = load float, ptr [[DOTFCA_1_2_GEP381]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT383:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT380]], float [[DOTFCA_1_2_LOAD382]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_GEP384:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_LOAD385:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP384]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT386:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT383]], i32 [[DOTFCA_1_3_LOAD385]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT151:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP76]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT154:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT151]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT157:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT154]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT160:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT157]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT163:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT160]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT166:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT163]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT169:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT166]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT172:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT169]], i32 [[TMP77]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT175:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT172]], i32 [[TMP78]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT178:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT175]], i32 [[TMP79]], 9 +; POSTPROCESS-CPS-NEXT: [[TMP85:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP86:%.*]] = add i32 [[TMP85]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP86]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP87:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP88:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP87]], i32 [[TMP88]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT386]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT178]]), !continuation.registercount [[META33]] ; POSTPROCESS-CPS-NEXT: unreachable ; ; ; POSTPROCESS-CPS-LABEL: define void @MyIntersectionShader( -; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41:![0-9]+]] !continuation [[META42:![0-9]+]] { +; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41:![0-9]+]] !continuation [[META42:![0-9]+]] { ; POSTPROCESS-CPS-NEXT: AllocaSpillBB: ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 -; POSTPROCESS-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP4]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP0:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP1]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP0]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i32 0 +; POSTPROCESS-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP3]], align 4 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 3 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> -; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP5]], 0 -; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 -; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = bitcast <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] to <3 x float> +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP4]], 0 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 0 ; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 -; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP6]], 3 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP5]], 3 ; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 ; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 ; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 ; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 -; POSTPROCESS-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] +; POSTPROCESS-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[SYSTEM_DATA_FCA_0_4_EXTRACT]] ; POSTPROCESS-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] ; POSTPROCESS-CPS: callAHit.i: -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 -; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyIntersectionShader.resume.0) -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i32 [[TMP7]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32:![0-9]+]] +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT350:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT11:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT8]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT14:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT11]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT17:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT14]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT20:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT17]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT23:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT20]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT26:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT23]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT29:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT26]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT32:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT29]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT35:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT32]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT38:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT35]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT41:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT38]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT44:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT41]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT47:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT44]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT50:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT47]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT53:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT50]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT56:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT53]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT59:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT56]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT62:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT59]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT65:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT62]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT68:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT65]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT71:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT68]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT74:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT71]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT77:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT74]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT80:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT77]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT83:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT80]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT86:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT83]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT89:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT86]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShader.resume.0 to i64)) +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT350]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32:![0-9]+]] ; POSTPROCESS-CPS-NEXT: unreachable ; POSTPROCESS-CPS: isEnd.i: ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 ; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP9]], i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0353_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP9]], i32 0 ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 ; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP11]], i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> -; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP12]], <2 x i32> poison, <3 x i32> -; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0353_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0353_0_VEC_INSERT]], float [[TMP11]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT352:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0353_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT286:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT352]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT286]] to <2 x i32> +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP12]], <2 x i32> poison, <3 x i32> +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] ; POSTPROCESS-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; POSTPROCESS-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP13:%.*]], label [[TMP16:%.*]] +; POSTPROCESS-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP13:%.*]], label [[TMP18:%.*]] ; POSTPROCESS-CPS: 13: -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -8 ; POSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP16]], i32 [[TMP17]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] ; POSTPROCESS-CPS-NEXT: unreachable -; POSTPROCESS-CPS: 16: +; POSTPROCESS-CPS: 18: ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], -8 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP18]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; POSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP20]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP22:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP21]], i32 [[TMP22]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] ; POSTPROCESS-CPS-NEXT: unreachable ; ; ; POSTPROCESS-CPS-LABEL: define dso_local void @MyIntersectionShader.resume.0( -; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META42]] { +; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_ANYHITTRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META42]] { ; POSTPROCESS-CPS-NEXT: entryresume.0: ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], -8 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 9 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 10 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 11 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 12 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 13 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 14 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 15 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 16 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 17 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 18 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 19 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 20 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 21 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 22 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 23 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 24 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 25 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 26 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 27 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 28 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 29 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 1, 3 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POSTPROCESS-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; POSTPROCESS-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP6:%.*]], label [[TMP11:%.*]] -; POSTPROCESS-CPS: 6: -; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) -; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP7]], i32 0 -; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(21) [[TMP8]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 -; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -8 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP10]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] -; POSTPROCESS-CPS-NEXT: unreachable -; POSTPROCESS-CPS: 11: -; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) -; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP12]], i32 0 -; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP13]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP8:%.*]], label [[TMP15:%.*]] +; POSTPROCESS-CPS: 8: +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 +; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[DOTFCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[DOTFCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[DOTFCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[DOTFCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[DOTFCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[DOTFCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[DOTFCA_7_EXTRACT]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[DOTFCA_8_EXTRACT]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[DOTFCA_9_EXTRACT]], 9 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[DOTFCA_10_EXTRACT]], 10 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[DOTFCA_11_EXTRACT]], 11 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[DOTFCA_12_EXTRACT]], 12 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[DOTFCA_13_EXTRACT]], 13 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[DOTFCA_14_EXTRACT]], 14 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[DOTFCA_15_EXTRACT]], 15 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[DOTFCA_16_EXTRACT]], 16 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[DOTFCA_17_EXTRACT]], 17 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[DOTFCA_18_EXTRACT]], 18 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[DOTFCA_19_EXTRACT]], 19 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[DOTFCA_20_EXTRACT]], 20 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[DOTFCA_21_EXTRACT]], 21 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[DOTFCA_22_EXTRACT]], 22 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[DOTFCA_23_EXTRACT]], 23 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[DOTFCA_24_EXTRACT]], 24 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[DOTFCA_25_EXTRACT]], 25 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[DOTFCA_26_EXTRACT]], 26 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[DOTFCA_27_EXTRACT]], 27 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[DOTFCA_28_EXTRACT]], 28 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[DOTFCA_29_EXTRACT]], 29 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = zext i32 [[RETURN_ADDR_RELOAD2]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -8 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP13]], i32 [[TMP14]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: unreachable +; POSTPROCESS-CPS: 15: +; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP16]], i32 0 +; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP17]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 +; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP19]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP20]], i32 [[TMP21]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] ; POSTPROCESS-CPS-NEXT: unreachable ; ; ; POSTPROCESS-CPS-LABEL: define void @MyIntersectionShader2( -; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43:![0-9]+]] { +; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43:![0-9]+]] { ; POSTPROCESS-CPS-NEXT: AllocaSpillBB: ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) -; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 -; POSTPROCESS-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP4]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP0:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP1]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP0]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i32 0 +; POSTPROCESS-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP3]], align 4 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 3 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> -; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP5]], 0 -; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 -; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = bitcast <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] to <3 x float> +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP4]], 0 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 0 ; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 -; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP6]], 3 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP5]], 3 ; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 ; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 ; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 ; POSTPROCESS-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 -; POSTPROCESS-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] +; POSTPROCESS-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[SYSTEM_DATA_FCA_0_4_EXTRACT]] ; POSTPROCESS-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] ; POSTPROCESS-CPS: callAHit.i: -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]] poison, <2 x float> undef, 0 -; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyIntersectionShader2.resume.0) -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i32 [[TMP7]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT350:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]] poison, <2 x float> undef, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT11:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT8]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT14:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT11]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT17:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT14]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT20:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT17]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT23:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT20]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT26:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT23]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT29:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT26]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT32:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT29]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT35:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT32]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT38:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT35]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT41:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT38]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT44:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT41]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT47:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT44]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT50:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT47]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT53:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT50]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT56:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT53]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT59:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT56]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT62:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT59]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT65:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT62]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT68:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT65]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT71:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT68]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT74:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT71]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT77:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT74]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT80:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT77]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT83:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT80]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT86:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT83]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT89:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT86]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShader2.resume.0 to i64)) +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT350]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32]] ; POSTPROCESS-CPS-NEXT: unreachable ; POSTPROCESS-CPS: isEnd.i: ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 ; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP9]], i32 0 +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0353_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP9]], i32 0 ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 ; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP11]], i32 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> -; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP12]], <2 x i32> poison, <3 x i32> -; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0353_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0353_0_VEC_INSERT]], float [[TMP11]], i32 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT352:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0353_4_VEC_INSERT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT286:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT352]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT286]] to <2 x i32> +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP12]], <2 x i32> poison, <3 x i32> +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] ; POSTPROCESS-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; POSTPROCESS-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP13:%.*]], label [[TMP16:%.*]] +; POSTPROCESS-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP13:%.*]], label [[TMP18:%.*]] ; POSTPROCESS-CPS: 13: -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -8 ; POSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP16]], i32 [[TMP17]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] ; POSTPROCESS-CPS-NEXT: unreachable -; POSTPROCESS-CPS: 16: +; POSTPROCESS-CPS: 18: ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], -8 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP18]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; POSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP20]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP22:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP21]], i32 [[TMP22]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] ; POSTPROCESS-CPS-NEXT: unreachable ; ; ; POSTPROCESS-CPS-LABEL: define dso_local void @MyIntersectionShader2.resume.0( -; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43]] { +; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_ANYHITTRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43]] { ; POSTPROCESS-CPS-NEXT: entryresume.0: ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], -8 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 9 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 10 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 11 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 12 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 13 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 14 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 15 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 16 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 17 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 18 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 19 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 20 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 21 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 22 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 23 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 24 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 25 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 26 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 27 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 28 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 29 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP7]], 1, 3 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POSTPROCESS-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; POSTPROCESS-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP6:%.*]], label [[TMP11:%.*]] -; POSTPROCESS-CPS: 6: -; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) -; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP7]], i32 0 -; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(21) [[TMP8]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 -; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -8 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP10]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] -; POSTPROCESS-CPS-NEXT: unreachable -; POSTPROCESS-CPS: 11: -; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) -; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP12]], i32 0 -; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP13]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP8:%.*]], label [[TMP15:%.*]] +; POSTPROCESS-CPS: 8: +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 +; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[DOTFCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[DOTFCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[DOTFCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[DOTFCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[DOTFCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[DOTFCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[DOTFCA_7_EXTRACT]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[DOTFCA_8_EXTRACT]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[DOTFCA_9_EXTRACT]], 9 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[DOTFCA_10_EXTRACT]], 10 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[DOTFCA_11_EXTRACT]], 11 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[DOTFCA_12_EXTRACT]], 12 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[DOTFCA_13_EXTRACT]], 13 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[DOTFCA_14_EXTRACT]], 14 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[DOTFCA_15_EXTRACT]], 15 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[DOTFCA_16_EXTRACT]], 16 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[DOTFCA_17_EXTRACT]], 17 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[DOTFCA_18_EXTRACT]], 18 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[DOTFCA_19_EXTRACT]], 19 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[DOTFCA_20_EXTRACT]], 20 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[DOTFCA_21_EXTRACT]], 21 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[DOTFCA_22_EXTRACT]], 22 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[DOTFCA_23_EXTRACT]], 23 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[DOTFCA_24_EXTRACT]], 24 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[DOTFCA_25_EXTRACT]], 25 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[DOTFCA_26_EXTRACT]], 26 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[DOTFCA_27_EXTRACT]], 27 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[DOTFCA_28_EXTRACT]], 28 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[DOTFCA_29_EXTRACT]], 29 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = zext i32 [[RETURN_ADDR_RELOAD2]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -8 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP13]], i32 [[TMP14]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: unreachable +; POSTPROCESS-CPS: 15: +; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP16]], i32 0 +; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP17]], align 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 +; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], -8 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP19]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP20]], i32 [[TMP21]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] ; POSTPROCESS-CPS-NEXT: unreachable ; ; ; POSTPROCESS-CPS-LABEL: define void @MyMissShader( -; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META37]] !continuation [[META44:![0-9]+]] { +; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META37]] !continuation [[META44:![0-9]+]] { ; POSTPROCESS-CPS-NEXT: AllocaSpillBB: ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; POSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; POSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 0, 0 +; POSTPROCESS-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP1]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP2]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 -; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 -; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP4]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP6]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT9:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_EXTRACT]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP4]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP5]], 7 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP6]], 8 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP7]], 9 +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 0 +; POSTPROCESS-CPS-NEXT: store i32 [[TMP9]], ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP10]], i32 [[TMP11]], i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] ; POSTPROCESS-CPS-NEXT: unreachable ; -; -; SROA-CPS-LABEL: define i32 @_cont_GetContinuationStackAddr( -; SROA-CPS-SAME: ) #[[ATTR0:[0-9]+]] { -; SROA-CPS-NEXT: ret i32 0 -; -; -; SROA-CPS-LABEL: define %struct.HitData @_cont_GetCandidateState( -; SROA-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { -; SROA-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_ANYHITTRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 0 -; SROA-CPS-NEXT: [[RES:%.*]] = load [[STRUCT_HITDATA:%.*]], ptr [[RESPTR]], align 4 -; SROA-CPS-NEXT: ret [[STRUCT_HITDATA]] [[RES]] -; -; -; SROA-CPS-LABEL: define void @_cont_SetTriangleHitAttributes( -; SROA-CPS-SAME: ptr [[DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[VAL:%.*]]) { -; SROA-CPS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0 -; SROA-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]], ptr [[ADDR]], align 4 -; SROA-CPS-NEXT: ret void -; -; -; SROA-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( -; SROA-CPS-SAME: ptr [[DATA:%.*]]) { -; SROA-CPS-NEXT: ret i32 5 -; -; -; SROA-CPS-LABEL: define i1 @_cont_IsEndSearch( -; SROA-CPS-SAME: ptr [[TMP0:%.*]]) #[[ATTR0]] { -; SROA-CPS-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() -; SROA-CPS-NEXT: ret i1 [[ISEND]] -; -; -; SROA-CPS-LABEL: define <3 x i32> @_cont_DispatchRaysIndex3( -; SROA-CPS-SAME: ptr [[DATA:%.*]]) { -; SROA-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[RES_1:%.*]] = load i32, ptr [[RESPTR_1]], align 4 -; SROA-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 1 -; SROA-CPS-NEXT: [[RES_2:%.*]] = load i32, ptr [[RESPTR_2]], align 4 -; SROA-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 2 -; SROA-CPS-NEXT: [[RES_3:%.*]] = load i32, ptr [[RESPTR_3]], align 4 -; SROA-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1]], i32 0 -; SROA-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x i32> [[VAL_0]], i32 [[RES_2]], i32 1 -; SROA-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x i32> [[VAL_1]], i32 [[RES_3]], i32 2 -; SROA-CPS-NEXT: ret <3 x i32> [[VAL_2]] -; -; -; SROA-CPS-LABEL: define <3 x float> @_cont_ObjectRayOrigin3( -; SROA-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { -; SROA-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 -; SROA-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 1 -; SROA-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 -; SROA-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 2 -; SROA-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 -; SROA-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 -; SROA-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 -; SROA-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 -; SROA-CPS-NEXT: ret <3 x float> [[VAL_2]] -; -; -; SROA-CPS-LABEL: define <3 x float> @_cont_ObjectRayDirection3( -; SROA-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { -; SROA-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 1, i32 0 -; SROA-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 -; SROA-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 1 -; SROA-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 -; SROA-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 2 -; SROA-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 -; SROA-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 -; SROA-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 -; SROA-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 -; SROA-CPS-NEXT: ret <3 x float> [[VAL_2]] -; -; -; SROA-CPS-LABEL: define float @_cont_RayTCurrent( -; SROA-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { -; SROA-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 2 -; SROA-CPS-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 -; SROA-CPS-NEXT: ret float [[RES]] -; -; -; SROA-CPS-LABEL: define void @MyRayGen( -; SROA-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !lgc.cps [[META22]] !continuation [[META35:![0-9]+]] { -; SROA-CPS-NEXT: AllocaSpillBB: -; SROA-CPS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() -; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 -; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; SROA-CPS-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; SROA-CPS-NEXT: [[TMP4:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; SROA-CPS-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) -; SROA-CPS-NEXT: [[TMP6:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP5]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; SROA-CPS-NEXT: [[TMP7:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP6]]) -; SROA-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 -; SROA-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 -; SROA-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; SROA-CPS-NEXT: [[TMP8:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyRayGen.resume.0) -; SROA-CPS-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -; SROA-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP9]], 5 -; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 -; SROA-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP10]], ptr addrspace(20) @REGISTERS, align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 -; SROA-CPS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 -; SROA-CPS-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 -; SROA-CPS-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 4, {} poison, i32 [[TMP8]], i32 5), !continuation.returnedRegistercount !33, !continuation.registercount [[META33:![0-9]+]] -; SROA-CPS-NEXT: unreachable -; -; -; SROA-CPS-LABEL: define dso_local void @MyRayGen.resume.0( -; SROA-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META22]] !lgc.cps [[META22]] !continuation [[META35]] { -; SROA-CPS-NEXT: entryresume.0: -; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 -; SROA-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; SROA-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float -; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i32 0 -; SROA-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float -; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP7]], i32 1 -; SROA-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float -; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP9]], i32 2 -; SROA-CPS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP11]], i32 3 -; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT6:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 -; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; SROA-CPS-NEXT: [[TMP12:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT2:%.*]] = extractelement <3 x i32> [[DOTFCA_0_EXTRACT]], i32 0 -; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT4:%.*]] = extractelement <3 x i32> [[DOTFCA_0_EXTRACT]], i32 1 -; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT6:%.*]] = extractelement <3 x i32> [[DOTFCA_0_EXTRACT]], i32 2 -; SROA-CPS-NEXT: [[VAL_0_I6:%.*]] = insertelement <3 x i32> undef, i32 [[DOTSROA_0_0_VEC_EXTRACT2]], i32 0 -; SROA-CPS-NEXT: [[VAL_1_I7:%.*]] = insertelement <3 x i32> [[VAL_0_I6]], i32 [[DOTSROA_0_4_VEC_EXTRACT4]], i32 1 -; SROA-CPS-NEXT: [[VAL_2_I8:%.*]] = insertelement <3 x i32> [[VAL_1_I7]], i32 [[DOTSROA_0_8_VEC_EXTRACT6]], i32 2 -; SROA-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[VAL_2_I8]], i8 0 -; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[DOTFCA_0_EXTRACT]], i32 0 -; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[DOTFCA_0_EXTRACT]], i32 1 -; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[DOTFCA_0_EXTRACT]], i32 2 -; SROA-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x i32> undef, i32 [[DOTSROA_0_0_VEC_EXTRACT]], i32 0 -; SROA-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x i32> [[VAL_0_I]], i32 [[DOTSROA_0_4_VEC_EXTRACT]], i32 1 -; SROA-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x i32> [[VAL_1_I]], i32 [[DOTSROA_0_8_VEC_EXTRACT]], i32 2 -; SROA-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[VAL_2_I]], i8 1 -; SROA-CPS-NEXT: [[TMP13:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP12]]) -; SROA-CPS-NEXT: [[TMP14:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP13]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) -; SROA-CPS-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 -; SROA-CPS-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 -; SROA-CPS-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 -; SROA-CPS-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 -; SROA-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP14]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP15]], float [[TMP16]], float [[TMP17]], float [[TMP18]], i8 15) -; SROA-CPS-NEXT: ret void -; -; -; SROA-CPS-LABEL: define void @MyClosestHitShader( -; SROA-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META36:![0-9]+]] !lgc.cps [[META37:![0-9]+]] !continuation [[META38:![0-9]+]] { -; SROA-CPS-NEXT: AllocaSpillBB: -; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; SROA-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 -; SROA-CPS-NEXT: [[TMP1:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) -; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 -; SROA-CPS-NEXT: [[DOTSROA_03_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; SROA-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[DOTSROA_03_0_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float -; SROA-CPS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0 -; SROA-CPS-NEXT: [[DOTSROA_03_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; SROA-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_03_4_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float -; SROA-CPS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP5]], i32 1 -; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; SROA-CPS-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 -; SROA-CPS-NEXT: [[TMP7:%.*]] = fsub fast float 1.000000e+00, [[TMP6]] -; SROA-CPS-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 -; SROA-CPS-NEXT: [[TMP9:%.*]] = fsub fast float [[TMP7]], [[TMP8]] -; SROA-CPS-NEXT: [[TMP10:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i64 0 -; SROA-CPS-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP6]], i64 1 -; SROA-CPS-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 2 -; SROA-CPS-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float 1.000000e+00, i64 3 -; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 0 -; SROA-CPS-NEXT: [[TMP14:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP14]], ptr addrspace(20) @REGISTERS, align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 1 -; SROA-CPS-NEXT: [[TMP15:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 2 -; SROA-CPS-NEXT: [[TMP16:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP13]], i32 3 -; SROA-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP18]], i32 0, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 -; SROA-CPS-NEXT: [[TMP19:%.*]] = add i32 [[CSPINIT]], 0 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) -; SROA-CPS-NEXT: unreachable -; -; -; SROA-CPS-LABEL: define void @MyAnyHitShader( -; SROA-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META39:![0-9]+]] !lgc.cps [[META36]] !continuation [[META40:![0-9]+]] { -; SROA-CPS-NEXT: AllocaSpillBB: -; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: store <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_0_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; SROA-CPS-NEXT: store <3 x float> [[DOTFCA_0_1_0_EXTRACT]], ptr [[DOTFCA_0_1_0_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; SROA-CPS-NEXT: store <3 x float> [[DOTFCA_0_1_1_EXTRACT]], ptr [[DOTFCA_0_1_1_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; SROA-CPS-NEXT: store float [[DOTFCA_0_1_2_EXTRACT]], ptr [[DOTFCA_0_1_2_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; SROA-CPS-NEXT: store i32 [[DOTFCA_0_1_3_EXTRACT]], ptr [[DOTFCA_0_1_3_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; SROA-CPS-NEXT: store <3 x float> [[DOTFCA_0_2_EXTRACT]], ptr [[DOTFCA_0_2_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; SROA-CPS-NEXT: store <3 x float> [[DOTFCA_0_3_EXTRACT]], ptr [[DOTFCA_0_3_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; SROA-CPS-NEXT: store float [[DOTFCA_0_4_EXTRACT]], ptr [[DOTFCA_0_4_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; SROA-CPS-NEXT: store i64 [[DOTFCA_0_5_EXTRACT]], ptr [[DOTFCA_0_5_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; SROA-CPS-NEXT: store <3 x float> [[DOTFCA_1_0_EXTRACT]], ptr [[DOTFCA_1_0_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; SROA-CPS-NEXT: store <3 x float> [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; SROA-CPS-NEXT: store float [[DOTFCA_1_2_EXTRACT]], ptr [[DOTFCA_1_2_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; SROA-CPS-NEXT: store i32 [[DOTFCA_1_3_EXTRACT]], ptr [[DOTFCA_1_3_GEP]], align 4 -; SROA-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[TMP3:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[TMP2]]) -; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT233:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP3]], 0 -; SROA-CPS-NEXT: [[DOTSROA_0235_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT233]], i32 0 -; SROA-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0235_0_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: [[DOTSROA_0235_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT233]], i32 1 -; SROA-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0235_4_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 -; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; SROA-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[RES_I1_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA:%.*]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; SROA-CPS-NEXT: [[RES_I1_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_0_GEP]], align 4 -; SROA-CPS-NEXT: [[RES_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I1_FCA_0_LOAD]], 0 -; SROA-CPS-NEXT: [[RES_I1_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 -; SROA-CPS-NEXT: [[RES_I1_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_1_GEP]], align 4 -; SROA-CPS-NEXT: [[RES_I1_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_0_INSERT]], <3 x float> [[RES_I1_FCA_1_LOAD]], 1 -; SROA-CPS-NEXT: [[RES_I1_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 -; SROA-CPS-NEXT: [[RES_I1_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I1_FCA_2_GEP]], align 4 -; SROA-CPS-NEXT: [[RES_I1_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_1_INSERT]], float [[RES_I1_FCA_2_LOAD]], 2 -; SROA-CPS-NEXT: [[RES_I1_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 -; SROA-CPS-NEXT: [[RES_I1_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I1_FCA_3_GEP]], align 4 -; SROA-CPS-NEXT: [[RES_I1_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_2_INSERT]], i32 [[RES_I1_FCA_3_LOAD]], 3 -; SROA-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 0 -; SROA-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 1 -; SROA-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 2 -; SROA-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 3 -; SROA-CPS-NEXT: [[DOTSROA_0256_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 0 -; SROA-CPS-NEXT: [[DOTSROA_0256_4_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 1 -; SROA-CPS-NEXT: [[DOTSROA_0256_8_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 2 -; SROA-CPS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_0256_0_VEC_EXTRACT]], i32 0 -; SROA-CPS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[DOTSROA_0256_4_VEC_EXTRACT]], i32 1 -; SROA-CPS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[DOTSROA_0256_8_VEC_EXTRACT]], i32 2 -; SROA-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I9]], i8 0 -; SROA-CPS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; SROA-CPS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_0_GEP]], align 4 -; SROA-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I_FCA_0_LOAD]], 0 -; SROA-CPS-NEXT: [[RES_I_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 -; SROA-CPS-NEXT: [[RES_I_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_1_GEP]], align 4 -; SROA-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[RES_I_FCA_1_LOAD]], 1 -; SROA-CPS-NEXT: [[RES_I_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 -; SROA-CPS-NEXT: [[RES_I_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I_FCA_2_GEP]], align 4 -; SROA-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[RES_I_FCA_2_LOAD]], 2 -; SROA-CPS-NEXT: [[RES_I_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 -; SROA-CPS-NEXT: [[RES_I_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I_FCA_3_GEP]], align 4 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[RES_I_FCA_3_LOAD]], 3 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 -; SROA-CPS-NEXT: [[DOTSROA_1_12_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 0 -; SROA-CPS-NEXT: [[DOTSROA_1_16_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 1 -; SROA-CPS-NEXT: [[DOTSROA_1_20_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 2 -; SROA-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_1_12_VEC_EXTRACT]], i32 0 -; SROA-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[DOTSROA_1_16_VEC_EXTRACT]], i32 1 -; SROA-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[DOTSROA_1_20_VEC_EXTRACT]], i32 2 -; SROA-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 -; SROA-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[RES_I10_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; SROA-CPS-NEXT: [[RES_I10_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_0_GEP]], align 4 -; SROA-CPS-NEXT: [[RES_I10_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I10_FCA_0_LOAD]], 0 -; SROA-CPS-NEXT: [[RES_I10_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 -; SROA-CPS-NEXT: [[RES_I10_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_1_GEP]], align 4 -; SROA-CPS-NEXT: [[RES_I10_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_0_INSERT]], <3 x float> [[RES_I10_FCA_1_LOAD]], 1 -; SROA-CPS-NEXT: [[RES_I10_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 -; SROA-CPS-NEXT: [[RES_I10_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I10_FCA_2_GEP]], align 4 -; SROA-CPS-NEXT: [[RES_I10_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_1_INSERT]], float [[RES_I10_FCA_2_LOAD]], 2 -; SROA-CPS-NEXT: [[RES_I10_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 -; SROA-CPS-NEXT: [[RES_I10_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I10_FCA_3_GEP]], align 4 -; SROA-CPS-NEXT: [[RES_I10_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_2_INSERT]], i32 [[RES_I10_FCA_3_LOAD]], 3 -; SROA-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 0 -; SROA-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 1 -; SROA-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 2 -; SROA-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 3 -; SROA-CPS-NEXT: [[TMP9:%.*]] = fmul fast float [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT]], [[EXTRACT]] -; SROA-CPS-NEXT: [[TMP10:%.*]] = fadd fast float [[TMP9]], [[EXTRACT1]] -; SROA-CPS-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], 0.000000e+00 -; SROA-CPS-NEXT: [[TMP12:%.*]] = fcmp fast ogt float [[TMP10]], 1.000000e+00 -; SROA-CPS-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP10]], -1.000000e+00 -; SROA-CPS-NEXT: br i1 [[TMP11]], label [[TMP14:%.*]], label [[TMP39:%.*]] -; SROA-CPS: 14: -; SROA-CPS-NEXT: br i1 [[TMP12]], label [[TMP15:%.*]], label [[TMP27:%.*]] -; SROA-CPS: 15: -; SROA-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP16]]) -; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 0 -; SROA-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP17]], ptr addrspace(20) @REGISTERS, align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 1 -; SROA-CPS-NEXT: [[TMP18:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 2 -; SROA-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> undef, i32 3 -; SROA-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; SROA-CPS-NEXT: [[TMP21:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP21]] to float -; SROA-CPS-NEXT: [[DOTSROA_0237_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP22]], i32 0 -; SROA-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; SROA-CPS-NEXT: [[TMP23:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; SROA-CPS-NEXT: [[DOTSROA_0237_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0237_0_VEC_INSERT]], float [[TMP24]], i32 1 -; SROA-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0237_4_VEC_INSERT]], 0 -; SROA-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT55:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 -; SROA-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP25]], i32 0, i32 0 -; SROA-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT55]], ptr [[DOTFCA_0_GEP]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP56:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP56]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_GEP57:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP57]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_GEP58:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP58]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_GEP59:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP59]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_LOAD]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_GEP60:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP60]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_LOAD]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_GEP61:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP61]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_GEP62:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP62]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_GEP63:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; SROA-CPS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP63]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_GEP64:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; SROA-CPS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP64]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_LOAD]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_GEP65:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP65]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_LOAD]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_GEP66:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; SROA-CPS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP66]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_LOAD]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_GEP67:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_1_2_GEP67]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_LOAD]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_GEP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP68]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_LOAD]], 1, 3 -; SROA-CPS-NEXT: [[TMP26:%.*]] = add i32 [[CSPINIT]], 0 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) -; SROA-CPS-NEXT: unreachable -; SROA-CPS: 27: -; SROA-CPS-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP28]]) -; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT24:%.*]] = extractelement <4 x float> undef, i32 0 -; SROA-CPS-NEXT: [[TMP29:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT24]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP29]], ptr addrspace(20) @REGISTERS, align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT32:%.*]] = extractelement <4 x float> undef, i32 1 -; SROA-CPS-NEXT: [[TMP30:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT32]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP30]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT40:%.*]] = extractelement <4 x float> undef, i32 2 -; SROA-CPS-NEXT: [[TMP31:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT40]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP31]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT48:%.*]] = extractelement <4 x float> undef, i32 3 -; SROA-CPS-NEXT: [[TMP32:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT48]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP32]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; SROA-CPS-NEXT: [[TMP33:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13]] to i32 -; SROA-CPS-NEXT: [[TMP34:%.*]] = bitcast i32 [[TMP33]] to float -; SROA-CPS-NEXT: [[DOTSROA_0241_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP34]], i32 0 -; SROA-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; SROA-CPS-NEXT: [[TMP35:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17]] to i32 -; SROA-CPS-NEXT: [[TMP36:%.*]] = bitcast i32 [[TMP35]] to float -; SROA-CPS-NEXT: [[DOTSROA_0241_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0241_0_VEC_INSERT]], float [[TMP36]], i32 1 -; SROA-CPS-NEXT: [[DOTFCA_0_INSERT240:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0241_4_VEC_INSERT]], 0 -; SROA-CPS-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT69:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT240]], 0 -; SROA-CPS-NEXT: [[DOTFCA_0_GEP70:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP37]], i32 0, i32 0 -; SROA-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT69]], ptr [[DOTFCA_0_GEP70]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP71:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD72:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP71]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT73:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD72]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_GEP74:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_LOAD75:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP74]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT76:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT73]], <3 x float> [[DOTFCA_0_1_0_LOAD75]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_GEP77:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_LOAD78:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP77]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT79:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT76]], <3 x float> [[DOTFCA_0_1_1_LOAD78]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_GEP80:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_LOAD81:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP80]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT82:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT79]], float [[DOTFCA_0_1_2_LOAD81]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_GEP83:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_LOAD84:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP83]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT85:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT82]], i32 [[DOTFCA_0_1_3_LOAD84]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_GEP86:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_0_2_LOAD87:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP86]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT88:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT85]], <3 x float> [[DOTFCA_0_2_LOAD87]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_GEP89:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_0_3_LOAD90:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP89]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT91:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT88]], <3 x float> [[DOTFCA_0_3_LOAD90]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_GEP92:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; SROA-CPS-NEXT: [[DOTFCA_0_4_LOAD93:%.*]] = load float, ptr [[DOTFCA_0_4_GEP92]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT94:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT91]], float [[DOTFCA_0_4_LOAD93]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_GEP95:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; SROA-CPS-NEXT: [[DOTFCA_0_5_LOAD96:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP95]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT97:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT94]], i64 [[DOTFCA_0_5_LOAD96]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_GEP98:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_1_0_LOAD99:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP98]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT100:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT97]], <3 x float> [[DOTFCA_1_0_LOAD99]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_GEP101:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; SROA-CPS-NEXT: [[DOTFCA_1_1_LOAD102:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP101]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT103:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT100]], <3 x float> [[DOTFCA_1_1_LOAD102]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_GEP104:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_1_2_LOAD105:%.*]] = load float, ptr [[DOTFCA_1_2_GEP104]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT106:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT103]], float [[DOTFCA_1_2_LOAD105]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_GEP107:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_1_3_LOAD108:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP107]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT109:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT106]], i32 [[DOTFCA_1_3_LOAD108]], 1, 3 -; SROA-CPS-NEXT: [[TMP38:%.*]] = add i32 [[CSPINIT]], 0 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT109]]) -; SROA-CPS-NEXT: unreachable -; SROA-CPS: 39: -; SROA-CPS-NEXT: br i1 [[TMP13]], label [[TMP40:%.*]], label [[TMP61:%.*]] -; SROA-CPS: 40: -; SROA-CPS-NEXT: br i1 [[TMP12]], label [[TMP41:%.*]], label [[TMP51:%.*]] -; SROA-CPS: 41: -; SROA-CPS-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP42]]) -; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT26:%.*]] = extractelement <4 x float> undef, i32 0 -; SROA-CPS-NEXT: [[TMP43:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT26]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP43]], ptr addrspace(20) @REGISTERS, align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT34:%.*]] = extractelement <4 x float> undef, i32 1 -; SROA-CPS-NEXT: [[TMP44:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT34]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP44]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT42:%.*]] = extractelement <4 x float> undef, i32 2 -; SROA-CPS-NEXT: [[TMP45:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT42]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP45]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT50:%.*]] = extractelement <4 x float> undef, i32 3 -; SROA-CPS-NEXT: [[TMP46:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT50]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP46]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[TMP47:%.*]] = bitcast i32 [[TMP4]] to float -; SROA-CPS-NEXT: [[DOTSROA_0245_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP47]], i32 0 -; SROA-CPS-NEXT: [[TMP48:%.*]] = bitcast i32 [[TMP5]] to float -; SROA-CPS-NEXT: [[DOTSROA_0245_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0245_0_VEC_INSERT]], float [[TMP48]], i32 1 -; SROA-CPS-NEXT: [[DOTFCA_0_INSERT244:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0245_4_VEC_INSERT]], 0 -; SROA-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT110:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT244]], 0 -; SROA-CPS-NEXT: [[DOTFCA_0_GEP111:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP49]], i32 0, i32 0 -; SROA-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT110]], ptr [[DOTFCA_0_GEP111]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP112:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD113:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP112]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT114:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD113]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_GEP115:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_LOAD116:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP115]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT117:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT114]], <3 x float> [[DOTFCA_0_1_0_LOAD116]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_GEP118:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_LOAD119:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP118]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT120:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT117]], <3 x float> [[DOTFCA_0_1_1_LOAD119]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_GEP121:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_LOAD122:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP121]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT123:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT120]], float [[DOTFCA_0_1_2_LOAD122]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_GEP124:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_LOAD125:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP124]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT126:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT123]], i32 [[DOTFCA_0_1_3_LOAD125]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_GEP127:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_0_2_LOAD128:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP127]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT129:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT126]], <3 x float> [[DOTFCA_0_2_LOAD128]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_GEP130:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_0_3_LOAD131:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP130]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT132:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT129]], <3 x float> [[DOTFCA_0_3_LOAD131]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_GEP133:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; SROA-CPS-NEXT: [[DOTFCA_0_4_LOAD134:%.*]] = load float, ptr [[DOTFCA_0_4_GEP133]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT135:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT132]], float [[DOTFCA_0_4_LOAD134]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_GEP136:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; SROA-CPS-NEXT: [[DOTFCA_0_5_LOAD137:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP136]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT138:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT135]], i64 [[DOTFCA_0_5_LOAD137]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_GEP139:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_1_0_LOAD140:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP139]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT141:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT138]], <3 x float> [[DOTFCA_1_0_LOAD140]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_GEP142:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; SROA-CPS-NEXT: [[DOTFCA_1_1_LOAD143:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP142]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT144:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT141]], <3 x float> [[DOTFCA_1_1_LOAD143]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_GEP145:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_1_2_LOAD146:%.*]] = load float, ptr [[DOTFCA_1_2_GEP145]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT147:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT144]], float [[DOTFCA_1_2_LOAD146]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_GEP148:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_1_3_LOAD149:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP148]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT150:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT147]], i32 [[DOTFCA_1_3_LOAD149]], 1, 3 -; SROA-CPS-NEXT: [[TMP50:%.*]] = add i32 [[CSPINIT]], 0 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT150]]) -; SROA-CPS-NEXT: unreachable -; SROA-CPS: 51: -; SROA-CPS-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP52]]) -; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT28:%.*]] = extractelement <4 x float> undef, i32 0 -; SROA-CPS-NEXT: [[TMP53:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT28]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP53]], ptr addrspace(20) @REGISTERS, align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT36:%.*]] = extractelement <4 x float> undef, i32 1 -; SROA-CPS-NEXT: [[TMP54:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT36]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP54]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT44:%.*]] = extractelement <4 x float> undef, i32 2 -; SROA-CPS-NEXT: [[TMP55:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT44]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP55]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT52:%.*]] = extractelement <4 x float> undef, i32 3 -; SROA-CPS-NEXT: [[TMP56:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT52]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP56]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[TMP57:%.*]] = bitcast i32 [[TMP4]] to float -; SROA-CPS-NEXT: [[DOTSROA_0249_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP57]], i32 0 -; SROA-CPS-NEXT: [[TMP58:%.*]] = bitcast i32 [[TMP5]] to float -; SROA-CPS-NEXT: [[DOTSROA_0249_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0249_0_VEC_INSERT]], float [[TMP58]], i32 1 -; SROA-CPS-NEXT: [[DOTFCA_0_INSERT248:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0249_4_VEC_INSERT]], 0 -; SROA-CPS-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT151:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT248]], 0 -; SROA-CPS-NEXT: [[DOTFCA_0_GEP152:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP59]], i32 0, i32 0 -; SROA-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT151]], ptr [[DOTFCA_0_GEP152]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP153:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD154:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP153]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT155:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD154]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_GEP156:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_LOAD157:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP156]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT158:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT155]], <3 x float> [[DOTFCA_0_1_0_LOAD157]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_GEP159:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_LOAD160:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP159]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT161:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT158]], <3 x float> [[DOTFCA_0_1_1_LOAD160]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_GEP162:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_LOAD163:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP162]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT164:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT161]], float [[DOTFCA_0_1_2_LOAD163]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_GEP165:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_LOAD166:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP165]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT167:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT164]], i32 [[DOTFCA_0_1_3_LOAD166]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_GEP168:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_0_2_LOAD169:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP168]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT170:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT167]], <3 x float> [[DOTFCA_0_2_LOAD169]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_GEP171:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_0_3_LOAD172:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP171]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT173:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT170]], <3 x float> [[DOTFCA_0_3_LOAD172]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_GEP174:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; SROA-CPS-NEXT: [[DOTFCA_0_4_LOAD175:%.*]] = load float, ptr [[DOTFCA_0_4_GEP174]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT176:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT173]], float [[DOTFCA_0_4_LOAD175]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_GEP177:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; SROA-CPS-NEXT: [[DOTFCA_0_5_LOAD178:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP177]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT179:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT176]], i64 [[DOTFCA_0_5_LOAD178]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_GEP180:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_1_0_LOAD181:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP180]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT182:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT179]], <3 x float> [[DOTFCA_1_0_LOAD181]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_GEP183:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; SROA-CPS-NEXT: [[DOTFCA_1_1_LOAD184:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP183]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT185:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT182]], <3 x float> [[DOTFCA_1_1_LOAD184]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_GEP186:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_1_2_LOAD187:%.*]] = load float, ptr [[DOTFCA_1_2_GEP186]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT188:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT185]], float [[DOTFCA_1_2_LOAD187]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_GEP189:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_1_3_LOAD190:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP189]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT191:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT188]], i32 [[DOTFCA_1_3_LOAD190]], 1, 3 -; SROA-CPS-NEXT: [[TMP60:%.*]] = add i32 [[CSPINIT]], 0 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT191]]) -; SROA-CPS-NEXT: unreachable -; SROA-CPS: 61: -; SROA-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT30:%.*]] = extractelement <4 x float> undef, i32 0 -; SROA-CPS-NEXT: [[TMP62:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT30]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP62]], ptr addrspace(20) @REGISTERS, align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT38:%.*]] = extractelement <4 x float> undef, i32 1 -; SROA-CPS-NEXT: [[TMP63:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT38]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP63]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT46:%.*]] = extractelement <4 x float> undef, i32 2 -; SROA-CPS-NEXT: [[TMP64:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT46]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP64]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT54:%.*]] = extractelement <4 x float> undef, i32 3 -; SROA-CPS-NEXT: [[TMP65:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT54]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP65]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; SROA-CPS-NEXT: [[TMP66:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15]] to i32 -; SROA-CPS-NEXT: [[TMP67:%.*]] = bitcast i32 [[TMP66]] to float -; SROA-CPS-NEXT: [[DOTSROA_0253_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP67]], i32 0 -; SROA-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; SROA-CPS-NEXT: [[TMP68:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19]] to i32 -; SROA-CPS-NEXT: [[TMP69:%.*]] = bitcast i32 [[TMP68]] to float -; SROA-CPS-NEXT: [[DOTSROA_0253_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0253_0_VEC_INSERT]], float [[TMP69]], i32 1 -; SROA-CPS-NEXT: [[DOTFCA_0_INSERT252:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0253_4_VEC_INSERT]], 0 -; SROA-CPS-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT192:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT252]], 0 -; SROA-CPS-NEXT: [[DOTFCA_0_GEP193:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP70]], i32 0, i32 0 -; SROA-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT192]], ptr [[DOTFCA_0_GEP193]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP194:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD195:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP194]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT196:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD195]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_GEP197:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_LOAD198:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP197]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT199:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT196]], <3 x float> [[DOTFCA_0_1_0_LOAD198]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_GEP200:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_LOAD201:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP200]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT202:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT199]], <3 x float> [[DOTFCA_0_1_1_LOAD201]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_GEP203:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_LOAD204:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP203]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT205:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT202]], float [[DOTFCA_0_1_2_LOAD204]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_GEP206:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_LOAD207:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP206]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT208:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT205]], i32 [[DOTFCA_0_1_3_LOAD207]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_GEP209:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_0_2_LOAD210:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP209]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT211:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT208]], <3 x float> [[DOTFCA_0_2_LOAD210]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_GEP212:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_0_3_LOAD213:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP212]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT214:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT211]], <3 x float> [[DOTFCA_0_3_LOAD213]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_GEP215:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; SROA-CPS-NEXT: [[DOTFCA_0_4_LOAD216:%.*]] = load float, ptr [[DOTFCA_0_4_GEP215]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT217:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT214]], float [[DOTFCA_0_4_LOAD216]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_GEP218:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; SROA-CPS-NEXT: [[DOTFCA_0_5_LOAD219:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP218]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT220:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT217]], i64 [[DOTFCA_0_5_LOAD219]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_GEP221:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; SROA-CPS-NEXT: [[DOTFCA_1_0_LOAD222:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP221]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT223:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT220]], <3 x float> [[DOTFCA_1_0_LOAD222]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_GEP224:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; SROA-CPS-NEXT: [[DOTFCA_1_1_LOAD225:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP224]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT226:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT223]], <3 x float> [[DOTFCA_1_1_LOAD225]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_GEP227:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; SROA-CPS-NEXT: [[DOTFCA_1_2_LOAD228:%.*]] = load float, ptr [[DOTFCA_1_2_GEP227]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT229:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT226]], float [[DOTFCA_1_2_LOAD228]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_GEP230:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; SROA-CPS-NEXT: [[DOTFCA_1_3_LOAD231:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP230]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT232:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT229]], i32 [[DOTFCA_1_3_LOAD231]], 1, 3 -; SROA-CPS-NEXT: [[TMP71:%.*]] = add i32 [[CSPINIT]], 0 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 18, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT232]]) -; SROA-CPS-NEXT: unreachable -; -; -; SROA-CPS-LABEL: define void @MyIntersectionShader( -; SROA-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41:![0-9]+]] !continuation [[META42:![0-9]+]] { -; SROA-CPS-NEXT: AllocaSpillBB: -; SROA-CPS-NEXT: [[TMP1:%.*]] = add i32 [[CSPINIT]], 8 -; SROA-CPS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[CSPINIT]] to ptr addrspace(21) -; SROA-CPS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i32 0 -; SROA-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP3]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 -; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; SROA-CPS-NEXT: [[TMP4:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> -; SROA-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP4]], 0 -; SROA-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 -; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 -; SROA-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 -; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 -; SROA-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP5]], 3 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 -; SROA-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] -; SROA-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] -; SROA-CPS: callAHit.i: -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 -; SROA-CPS-NEXT: [[TMP6:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyIntersectionShader.resume.0) -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i32 [[TMP6]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32:![0-9]+]] -; SROA-CPS-NEXT: unreachable -; SROA-CPS: isEnd.i: -; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 -; SROA-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float -; SROA-CPS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i32 0 -; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 -; SROA-CPS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; SROA-CPS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP10]], i32 1 -; SROA-CPS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 -; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 -; SROA-CPS-NEXT: [[TMP11:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> -; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> poison, <3 x i32> -; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] -; SROA-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; SROA-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP12:%.*]], label [[TMP14:%.*]] -; SROA-CPS: 12: -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; SROA-CPS-NEXT: [[TMP13:%.*]] = add i32 [[TMP1]], -8 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] -; SROA-CPS-NEXT: unreachable -; SROA-CPS: 14: -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; SROA-CPS-NEXT: [[TMP15:%.*]] = add i32 [[TMP1]], -8 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) -; SROA-CPS-NEXT: unreachable -; -; -; SROA-CPS-LABEL: define dso_local void @MyIntersectionShader.resume.0( -; SROA-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META42]] { -; SROA-CPS-NEXT: entryresume.0: -; SROA-CPS-NEXT: [[TMP4:%.*]] = add i32 [[CSPINIT]], -8 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 3 -; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; SROA-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; SROA-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP5:%.*]], label [[TMP9:%.*]] -; SROA-CPS: 5: -; SROA-CPS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; SROA-CPS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 -; SROA-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(21) [[TMP7]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 -; SROA-CPS-NEXT: [[TMP8:%.*]] = add i32 [[CSPINIT]], -8 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] -; SROA-CPS-NEXT: unreachable -; SROA-CPS: 9: -; SROA-CPS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; SROA-CPS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 -; SROA-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP11]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 -; SROA-CPS-NEXT: [[TMP12:%.*]] = add i32 [[CSPINIT]], -8 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) -; SROA-CPS-NEXT: unreachable -; -; -; SROA-CPS-LABEL: define void @MyIntersectionShader2( -; SROA-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43:![0-9]+]] { -; SROA-CPS-NEXT: AllocaSpillBB: -; SROA-CPS-NEXT: [[TMP1:%.*]] = add i32 [[CSPINIT]], 8 -; SROA-CPS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[CSPINIT]] to ptr addrspace(21) -; SROA-CPS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i32 0 -; SROA-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP3]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 1, 3 -; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; SROA-CPS-NEXT: [[TMP4:%.*]] = bitcast <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] to <3 x float> -; SROA-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP4]], 0 -; SROA-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 1 -; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 0 -; SROA-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 -; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[DOTFCA_0_1_1_EXTRACT]], i32 1 -; SROA-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP5]], 3 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 -; SROA-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 -; SROA-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[DOTFCA_0_4_EXTRACT]] -; SROA-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] -; SROA-CPS: callAHit.i: -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; SROA-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]] poison, <2 x float> undef, 0 -; SROA-CPS-NEXT: [[TMP6:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyIntersectionShader2.resume.0) -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 8, {} poison, i32 [[TMP6]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount !32, !continuation.registercount [[META32]] -; SROA-CPS-NEXT: unreachable -; SROA-CPS: isEnd.i: -; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 -; SROA-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float -; SROA-CPS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i32 0 -; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 -; SROA-CPS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; SROA-CPS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP10]], i32 1 -; SROA-CPS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 -; SROA-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 -; SROA-CPS-NEXT: [[TMP11:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> -; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> poison, <3 x i32> -; SROA-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]] -; SROA-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; SROA-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP12:%.*]], label [[TMP14:%.*]] -; SROA-CPS: 12: -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; SROA-CPS-NEXT: [[TMP13:%.*]] = add i32 [[TMP1]], -8 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] -; SROA-CPS-NEXT: unreachable -; SROA-CPS: 14: -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; SROA-CPS-NEXT: [[TMP15:%.*]] = add i32 [[TMP1]], -8 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) -; SROA-CPS-NEXT: unreachable -; -; -; SROA-CPS-LABEL: define dso_local void @MyIntersectionShader2.resume.0( -; SROA-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META37]] !lgc.cps [[META41]] !continuation [[META43]] { -; SROA-CPS-NEXT: entryresume.0: -; SROA-CPS-NEXT: [[TMP4:%.*]] = add i32 [[CSPINIT]], -8 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP3]], 1, 3 -; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; SROA-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; SROA-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP5:%.*]], label [[TMP9:%.*]] -; SROA-CPS: 5: -; SROA-CPS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; SROA-CPS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 -; SROA-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(21) [[TMP7]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 -; SROA-CPS-NEXT: [[TMP8:%.*]] = add i32 [[CSPINIT]], -8 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] -; SROA-CPS-NEXT: unreachable -; SROA-CPS: 9: -; SROA-CPS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; SROA-CPS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 -; SROA-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP11]], align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT16]], 0, 0, 0, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT18]], 0, 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT20]], 0, 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT22]], 0, 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT24]], 0, 1, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT26]], 0, 2 -; SROA-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT28]], 0, 3 -; SROA-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT30]], 0, 4 -; SROA-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT32]], 0, 5 -; SROA-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT34]], 1, 0 -; SROA-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 -; SROA-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 -; SROA-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 -; SROA-CPS-NEXT: [[TMP12:%.*]] = add i32 [[CSPINIT]], -8 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 10, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]) -; SROA-CPS-NEXT: unreachable -; -; -; SROA-CPS-LABEL: define void @MyMissShader( -; SROA-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META37]] !continuation [[META44:![0-9]+]] { -; SROA-CPS-NEXT: AllocaSpillBB: -; SROA-CPS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 -; SROA-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; SROA-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 -; SROA-CPS-NEXT: [[TMP1:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP1]], ptr addrspace(20) @REGISTERS, align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 -; SROA-CPS-NEXT: [[TMP2:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP2]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 -; SROA-CPS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 -; SROA-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; SROA-CPS-NEXT: store i32 [[TMP4]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; SROA-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 -; SROA-CPS-NEXT: [[TMP5:%.*]] = add i32 [[CSPINIT]], 0 -; SROA-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]) -; SROA-CPS-NEXT: unreachable -; diff --git a/llvmraytracing/test/dx/paq-hit-attribute-size.ll b/llvmraytracing/test/dx/paq-hit-attribute-size.ll index 9b30438a33..6ae6569450 100644 --- a/llvmraytracing/test/dx/paq-hit-attribute-size.ll +++ b/llvmraytracing/test/dx/paq-hit-attribute-size.ll @@ -39,19 +39,16 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: ; CHECK-DAG: %struct.MyPayload.attr_max_1_i32s.layout_0_caller_out = type { [4 x i32] } ; CHECK-LABEL: define {{.*}} @AnyHit1DWordsMax1DWords( -; CHECK: {{.*}}%struct.MyPayload.attr_max_1_i32s.layout_2_anyhit_out_accept define void @AnyHit1DWordsMax1DWords(%struct.MyPayload* %payload, %struct.Attributes1DWords* %attrs) !lgc.rt.attribute.size !49 !types !60 { ret void } ; CHECK-LABEL: define {{.*}} @AnyHit1DWordsMax2DWords( -; CHECK: {{.*}}%struct.MyPayload.attr_max_2_i32s.layout_2_anyhit_out_accept define void @AnyHit1DWordsMax2DWords(%struct.MyPayload* %payload, %struct.Attributes1DWords* %attrs) !lgc.rt.attribute.size !22 !types !60 { ret void } ; CHECK-LABEL: define {{.*}} @AnyHit1DWordsMax8DWords( -; CHECK: {{.*}}%struct.MyPayload.attr_max_8_i32s.layout_2_anyhit_out_accept.payload_attr_0_i32s define void @AnyHit1DWordsMax8DWords(%struct.MyPayload* %payload, %struct.Attributes1DWords* %attrs) !lgc.rt.attribute.size !27 !types !60 { ret void } @@ -59,7 +56,6 @@ define void @AnyHit1DWordsMax8DWords(%struct.MyPayload* %payload, %struct.Attrib ; The actual size matches the max size for this one, so the layout_2_anyhit_out_accept layout ; is not specialized, thus no payload_attr_N_i32s suffix. ; CHECK-LABEL: define {{.*}} @AnyHit2DWordsMax2DWords( -; CHECK: {{.*}}%struct.MyPayload.attr_max_2_i32s.layout_2_anyhit_out_accept define void @AnyHit2DWordsMax2DWords(%struct.MyPayload* %payload, %struct.Attributes2DWords* %attrs) !lgc.rt.attribute.size !22 !types !23 { ret void } @@ -67,37 +63,31 @@ define void @AnyHit2DWordsMax2DWords(%struct.MyPayload* %payload, %struct.Attrib ; The actual size is 2 DWords smaller than the max size. ; There are 2 unused DWords in the layout. ; CHECK-LABEL: define {{.*}} @AnyHit2DWordsMax4DWords( -; CHECK: {{.*}}%struct.MyPayload.attr_max_4_i32s.layout_2_anyhit_out_accept.payload_attr_0_i32s define void @AnyHit2DWordsMax4DWords(%struct.MyPayload* %payload, %struct.Attributes2DWords* %attrs) !lgc.rt.attribute.size !26 !types !23 { ret void } ; CHECK-LABEL: define {{.*}} @AnyHit2DWordsMax8DWords( -; CHECK: {{.*}}%struct.MyPayload.attr_max_8_i32s.layout_2_anyhit_out_accept.payload_attr_0_i32s define void @AnyHit2DWordsMax8DWords(%struct.MyPayload* %payload, %struct.Attributes2DWords* %attrs) !lgc.rt.attribute.size !27 !types !23 { ret void } ; CHECK-LABEL: define {{.*}} @AnyHit2DWordsNoLimit( -; CHECK: {{.*}}%struct.MyPayload.attr_max_8_i32s.layout_2_anyhit_out_accept.payload_attr_0_i32s define void @AnyHit2DWordsNoLimit(%struct.MyPayload* %payload, %struct.Attributes2DWords* %attrs) !types !23 { ret void } ; CHECK-LABEL: define {{.*}} @AnyHit4DWordsMax4DWords( -; CHECK: {{.*}}%struct.MyPayload.attr_max_4_i32s.layout_2_anyhit_out_accept define void @AnyHit4DWordsMax4DWords(%struct.MyPayload* %payload, %struct.Attributes4DWords* %attrs) !lgc.rt.attribute.size !26 !types !28 { ret void } ; CHECK-LABEL: define {{.*}} @AnyHit4DWordsMax8DWords( -; CHECK: {{.*}}%struct.MyPayload.attr_max_8_i32s.layout_2_anyhit_out_accept.payload_attr_2_i32s define void @AnyHit4DWordsMax8DWords(%struct.MyPayload* %payload, %struct.Attributes4DWords* %attrs) !lgc.rt.attribute.size !27 !types !28 { ret void } ; CHECK-LABEL: define {{.*}} @AnyHit4DWordsNoLimit( -; CHECK: {{.*}}%struct.MyPayload.attr_max_8_i32s.layout_2_anyhit_out_accept.payload_attr_2_i32s define void @AnyHit4DWordsNoLimit(%struct.MyPayload* %payload, %struct.Attributes4DWords* %attrs) !types !28 { ret void } diff --git a/llvmraytracing/test/dx/payload-caller-in-paq.ll b/llvmraytracing/test/dx/payload-caller-in-paq.ll index 3561189a03..1243000692 100644 --- a/llvmraytracing/test/dx/payload-caller-in-paq.ll +++ b/llvmraytracing/test/dx/payload-caller-in-paq.ll @@ -31,7 +31,7 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: ; Function Attrs: nounwind define void @RayGen() #0 { ; LOWERRAYTRACINGPIPELINE-LABEL: define void @RayGen( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META24:![0-9]+]] !continuation.entry [[META14:![0-9]+]] !continuation.registercount [[META24]] !continuation [[META28:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META23]] !continuation [[META27:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) @@ -41,53 +41,51 @@ define void @RayGen() #0 { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP4]] to ptr ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP5]]) #[[ATTR0]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: store float 1.000000e+00, ptr [[TMP6]], align 8, !tbaa [[TBAA29:![0-9]+]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-NEXT: store float 1.000000e+00, ptr [[TMP6]], align 8, !tbaa [[TBAA28:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP11]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META33:![0-9]+]], !continuation.returnedRegistercount !26 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP12]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META32:![0-9]+]], !continuation.returnedRegistercount !25 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_STRUCT_DISPATCHSYSTEMDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP41]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MYPAYLOAD]] poison, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load float, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP15]], ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP14]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_MYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr [[TMP16]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP18]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_MYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP19]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr ([[STRUCT_MYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP13]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP6]], align 8, !tbaa [[TBAA29]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP6]], align 8, !tbaa [[TBAA28]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !tbaa [[TBAA34:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !tbaa [[TBAA33:![0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = sitofp i32 [[TMP26]] to float ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP28]], align 8, !tbaa [[TBAA36:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP28]], align 8, !tbaa [[TBAA35:![0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = fptrunc double [[TMP29]] to float ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() ; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP31]], i8 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() ; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP32]], i8 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP33]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE]](i32 216, [[DX_TYPES_HANDLE]] [[TMP39]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) ; LOWERRAYTRACINGPIPELINE-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP34]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP24]], float [[TMP27]], float [[TMP30]], float 0.000000e+00, i8 15) ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP5]]) #[[ATTR0]] -; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META25:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META24:![0-9]+]] ; %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?myAccelerationStructure@@3URaytracingAccelerationStructure@@A", align 4 %2 = load %dx.types.Handle, %dx.types.Handle* @"\01?gOutput@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 diff --git a/llvmraytracing/test/dx/payload-save-registers.ll b/llvmraytracing/test/dx/payload-save-registers.ll index 37628101c3..423e5ad43e 100644 --- a/llvmraytracing/test/dx/payload-save-registers.ll +++ b/llvmraytracing/test/dx/payload-save-registers.ll @@ -30,160 +30,156 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: ; Function Attrs: nounwind define void @Miss(%struct.OuterPayload* noalias nocapture %outerPayload) #0 !types !23 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @Miss( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META26:![0-9]+]] !continuation.registercount [[META24:![0-9]+]] !continuation [[META27:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META25:![0-9]+]] !continuation.registercount [[META23:![0-9]+]] !continuation [[META26:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_OUTERPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP5]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP5]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP5]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP5]], i64 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP5]], i64 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP5]], i64 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP5]], i64 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP5]], i64 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP5]], i64 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP5]], i64 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP5]], i64 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP5]], i64 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[TMP5]], i64 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = load i32, ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 22), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 23), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load i32, ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 24), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = load i32, ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 25), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 26), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 27), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 28), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load i32, ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 29), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?myAccelerationStructure@@3URaytracingAccelerationStructure@@A", align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = alloca [[STRUCT_INNERPAYLOAD:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = bitcast ptr [[TMP46]] to ptr ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP47]]) #[[ATTR0]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load float, ptr [[TMP48]], align 4, !tbaa [[TBAA28:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = load float, ptr [[TMP48]], align 4, !tbaa [[TBAA27:![0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_INNERPAYLOAD]], ptr [[TMP46]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP49]], ptr [[TMP50]], align 4, !tbaa [[TBAA28]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP45]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP51]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP51]], ptr [[TMP50]], align 4, !tbaa [[TBAA27]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP45]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP56]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP52]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP54]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_INNERPAYLOAD]], ptr [[TMP46]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = load float, ptr [[TMP55]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP56]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META32:![0-9]+]], !continuation.returnedRegistercount !32 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP57]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP55]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP61]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META31:![0-9]+]], !continuation.returnedRegistercount !31 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_STRUCT_DISPATCHSYSTEMDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP65]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_INNERPAYLOAD]] poison, ptr [[TMP46]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_INNERPAYLOAD]], ptr [[TMP46]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = load float, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP60]], ptr [[TMP59]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP97:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP97]], ptr [[TMP59]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP58]], ptr [[TMP54]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = load float, ptr [[TMP50]], align 4, !tbaa [[TBAA28]] -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP61]], ptr [[TMP48]], align 4, !tbaa [[TBAA28]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = load float, ptr [[TMP50]], align 4, !tbaa [[TBAA27]] +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP60]], ptr [[TMP48]], align 4, !tbaa [[TBAA27]] ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP47]]) #[[ATTR0]] -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP36]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 22), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 23), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP39]], ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 24), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP40]], ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 25), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 26), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 27), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 28), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP44]], ptr getelementptr inbounds ([37 x i32], ptr @PAYLOAD, i32 0, i32 29), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP39]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP40]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP44]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr [[TMP62]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[TMP63]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP64]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP65]], ptr getelementptr inbounds ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr i32, ptr [[TMP63]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP66]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP67]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = getelementptr i32, ptr [[TMP63]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP68]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP69]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP63]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP71]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP63]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP73]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = getelementptr i32, ptr [[TMP63]], i64 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP75]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = getelementptr i32, ptr [[TMP63]], i64 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP77]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = getelementptr i32, ptr [[TMP63]], i64 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP79]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP80:%.*]] = getelementptr i32, ptr [[TMP63]], i64 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP81]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr [[TMP63]], i64 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP82]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP83]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = getelementptr i32, ptr [[TMP63]], i64 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP85]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = getelementptr i32, ptr [[TMP63]], i64 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP87]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP88:%.*]] = getelementptr i32, ptr [[TMP63]], i64 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP88]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP89]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP90:%.*]] = getelementptr i32, ptr [[TMP63]], i64 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP90]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP91]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP92:%.*]] = getelementptr i32, ptr [[TMP63]], i64 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP92]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP93]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP62]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP71]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP64]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP73]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP66]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP75]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP68]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP77]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP70]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP79]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP72]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP81]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP74]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP83]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP76]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP85]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP78]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP87]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP80:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP80]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP89]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP82]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP91]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP84]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP93]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP98:%.*]] = load i32, ptr [[TMP86]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP98]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP88:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP100:%.*]] = load i32, ptr [[TMP88]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP100]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP90:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP101:%.*]] = load i32, ptr [[TMP90]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP101]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP95:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP94]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP95]], !continuation.registercount [[META24]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP95]], !continuation.registercount [[META23]] ; %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?myAccelerationStructure@@3URaytracingAccelerationStructure@@A", align 4 %2 = alloca %struct.InnerPayload, align 4 @@ -205,106 +201,101 @@ define void @Miss(%struct.OuterPayload* noalias nocapture %outerPayload) #0 !typ ; Function Attrs: nounwind define void @Callable(%struct.OuterPayload* noalias %outerPayload) #0 !types !23 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @Callable( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0]] !lgc.rt.shaderstage [[META33:![0-9]+]] !continuation.registercount [[META24]] !continuation [[META34:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0]] !lgc.rt.shaderstage [[META32:![0-9]+]] !continuation.registercount [[META23]] !continuation [[META33:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_OUTERPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_OUTERPAYLOAD]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP5]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP5]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP5]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 4), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP5]], i64 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP5]], i64 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 6), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP5]], i64 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP5]], i64 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP5]], i64 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP5]], i64 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 10), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP5]], i64 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 11), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP5]], i64 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 12), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP5]], i64 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 13), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[TMP5]], i64 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 14), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 3), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 4), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 5), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 6), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP22]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr [[TMP24]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP26]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP28]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[TMP30]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr [[TMP32]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr [[TMP36]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[TMP37]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 15), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[TMP37]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[TMP37]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr [[TMP42]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr [[TMP37]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP45]], ptr [[TMP44]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[TMP37]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP47]], ptr [[TMP46]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[TMP37]], i64 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr [[TMP37]], i64 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP51]], ptr [[TMP50]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = getelementptr i32, ptr [[TMP37]], i64 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP53]], ptr [[TMP52]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP37]], i64 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP55]], ptr [[TMP54]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[TMP37]], i64 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP57]], ptr [[TMP56]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = getelementptr i32, ptr [[TMP37]], i64 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP58]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP37]], i64 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP61]], ptr [[TMP60]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr i32, ptr [[TMP37]], i64 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP63]], ptr [[TMP62]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[TMP37]], i64 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP65]], ptr [[TMP64]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr i32, ptr [[TMP37]], i64 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP67]], ptr [[TMP66]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP39]], ptr [[TMP36]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr [[TMP38]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP45]], ptr [[TMP40]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP47]], ptr [[TMP42]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr [[TMP44]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP51]], ptr [[TMP46]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP53]], ptr [[TMP48]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP55]], ptr [[TMP50]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP57]], ptr [[TMP52]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP54]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP61]], ptr [[TMP56]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP63]], ptr [[TMP58]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP65]], ptr [[TMP60]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP67]], ptr [[TMP62]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 0 @@ -428,357 +419,344 @@ define void @Callable(%struct.OuterPayload* noalias %outerPayload) #0 !types !23 ; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP157]], ptr [[TMP155]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP159:%.*]] = getelementptr i32, ptr [[TMP158]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP160:%.*]] = getelementptr i32, ptr [[TMP159]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP161:%.*]] = load i32, ptr [[TMP160]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP161]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP162:%.*]] = getelementptr i32, ptr [[TMP159]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP163:%.*]] = load i32, ptr [[TMP162]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP163]], ptr getelementptr (i32, ptr @PAYLOAD, i64 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP164:%.*]] = getelementptr i32, ptr [[TMP159]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP165:%.*]] = load i32, ptr [[TMP164]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP165]], ptr getelementptr (i32, ptr @PAYLOAD, i64 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP166:%.*]] = getelementptr i32, ptr [[TMP159]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP167:%.*]] = load i32, ptr [[TMP166]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP167]], ptr getelementptr (i32, ptr @PAYLOAD, i64 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP168:%.*]] = getelementptr i32, ptr [[TMP159]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP169:%.*]] = load i32, ptr [[TMP168]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP169]], ptr getelementptr (i32, ptr @PAYLOAD, i64 4), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP170:%.*]] = getelementptr i32, ptr [[TMP159]], i64 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP171:%.*]] = load i32, ptr [[TMP170]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP171]], ptr getelementptr (i32, ptr @PAYLOAD, i64 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP172:%.*]] = getelementptr i32, ptr [[TMP159]], i64 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP173:%.*]] = load i32, ptr [[TMP172]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP173]], ptr getelementptr (i32, ptr @PAYLOAD, i64 6), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP174:%.*]] = getelementptr i32, ptr [[TMP159]], i64 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP175:%.*]] = load i32, ptr [[TMP174]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP175]], ptr getelementptr (i32, ptr @PAYLOAD, i64 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP176:%.*]] = getelementptr i32, ptr [[TMP159]], i64 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP177:%.*]] = load i32, ptr [[TMP176]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP177]], ptr getelementptr (i32, ptr @PAYLOAD, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP178:%.*]] = getelementptr i32, ptr [[TMP159]], i64 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP179:%.*]] = load i32, ptr [[TMP178]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP179]], ptr getelementptr (i32, ptr @PAYLOAD, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP180:%.*]] = getelementptr i32, ptr [[TMP159]], i64 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP181:%.*]] = load i32, ptr [[TMP180]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP181]], ptr getelementptr (i32, ptr @PAYLOAD, i64 10), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP182:%.*]] = getelementptr i32, ptr [[TMP159]], i64 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP183:%.*]] = load i32, ptr [[TMP182]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP183]], ptr getelementptr (i32, ptr @PAYLOAD, i64 11), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP184:%.*]] = getelementptr i32, ptr [[TMP159]], i64 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP185:%.*]] = load i32, ptr [[TMP184]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP185]], ptr getelementptr (i32, ptr @PAYLOAD, i64 12), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP186:%.*]] = getelementptr i32, ptr [[TMP159]], i64 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP187:%.*]] = load i32, ptr [[TMP186]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP187]], ptr getelementptr (i32, ptr @PAYLOAD, i64 13), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP188:%.*]] = getelementptr i32, ptr [[TMP159]], i64 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP189:%.*]] = load i32, ptr [[TMP188]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP189]], ptr getelementptr (i32, ptr @PAYLOAD, i64 14), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP161:%.*]] = load i32, ptr [[TMP158]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP161]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP159:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP163:%.*]] = load i32, ptr [[TMP159]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP163]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP184:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP165:%.*]] = load i32, ptr [[TMP184]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP165]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP160:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP167:%.*]] = load i32, ptr [[TMP160]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP167]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 3), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP162:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP169:%.*]] = load i32, ptr [[TMP162]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP169]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 4), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP164:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP171:%.*]] = load i32, ptr [[TMP164]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP171]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 5), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP166:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP173:%.*]] = load i32, ptr [[TMP166]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP173]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 6), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP168:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP175:%.*]] = load i32, ptr [[TMP168]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP175]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP170:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP177:%.*]] = load i32, ptr [[TMP170]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP177]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP172:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP179:%.*]] = load i32, ptr [[TMP172]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP179]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP174:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP181:%.*]] = load i32, ptr [[TMP174]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP181]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP176:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP183:%.*]] = load i32, ptr [[TMP176]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP183]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP178:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP185:%.*]] = load i32, ptr [[TMP178]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP185]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP180:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP187:%.*]] = load i32, ptr [[TMP180]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP187]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP182:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP189:%.*]] = load i32, ptr [[TMP182]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP189]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP190:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP191:%.*]] = getelementptr i32, ptr [[TMP190]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP192:%.*]] = getelementptr i32, ptr [[TMP191]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP193:%.*]] = load i32, ptr [[TMP192]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP193]], ptr getelementptr inbounds ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i32 15), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP194:%.*]] = getelementptr i32, ptr [[TMP191]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP195:%.*]] = load i32, ptr [[TMP194]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP195]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP196:%.*]] = getelementptr i32, ptr [[TMP191]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP197:%.*]] = load i32, ptr [[TMP196]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP197]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP198:%.*]] = getelementptr i32, ptr [[TMP191]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP199:%.*]] = load i32, ptr [[TMP198]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP199]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP200:%.*]] = getelementptr i32, ptr [[TMP191]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP201:%.*]] = load i32, ptr [[TMP200]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP201]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP202:%.*]] = getelementptr i32, ptr [[TMP191]], i64 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP203:%.*]] = load i32, ptr [[TMP202]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP203]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP204:%.*]] = getelementptr i32, ptr [[TMP191]], i64 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP205:%.*]] = load i32, ptr [[TMP204]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP205]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP206:%.*]] = getelementptr i32, ptr [[TMP191]], i64 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP207:%.*]] = load i32, ptr [[TMP206]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP207]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP208:%.*]] = getelementptr i32, ptr [[TMP191]], i64 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP209:%.*]] = load i32, ptr [[TMP208]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP209]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP210:%.*]] = getelementptr i32, ptr [[TMP191]], i64 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP211:%.*]] = load i32, ptr [[TMP210]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP211]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP212:%.*]] = getelementptr i32, ptr [[TMP191]], i64 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP213:%.*]] = load i32, ptr [[TMP212]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP213]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP214:%.*]] = getelementptr i32, ptr [[TMP191]], i64 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP215:%.*]] = load i32, ptr [[TMP214]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP215]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP216:%.*]] = getelementptr i32, ptr [[TMP191]], i64 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP217:%.*]] = load i32, ptr [[TMP216]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP217]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP218:%.*]] = getelementptr i32, ptr [[TMP191]], i64 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP219:%.*]] = load i32, ptr [[TMP218]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP219]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP220:%.*]] = getelementptr i32, ptr [[TMP191]], i64 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP221:%.*]] = load i32, ptr [[TMP220]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP221]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP222:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META24]], !continuation.returnedRegistercount !24 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP223:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP222]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP193:%.*]] = load i32, ptr [[TMP190]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP193]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP186:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP195:%.*]] = load i32, ptr [[TMP186]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP195]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP188:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP197:%.*]] = load i32, ptr [[TMP188]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP197]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP191:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP199:%.*]] = load i32, ptr [[TMP191]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP199]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP192:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP201:%.*]] = load i32, ptr [[TMP192]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP201]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP194:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP203:%.*]] = load i32, ptr [[TMP194]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP203]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP196:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP205:%.*]] = load i32, ptr [[TMP196]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP205]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP198:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP207:%.*]] = load i32, ptr [[TMP198]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP207]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP200:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP209:%.*]] = load i32, ptr [[TMP200]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP209]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP202:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP211:%.*]] = load i32, ptr [[TMP202]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP211]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP204:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP213:%.*]] = load i32, ptr [[TMP204]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP213]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP206:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP215:%.*]] = load i32, ptr [[TMP206]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP215]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP208:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP217:%.*]] = load i32, ptr [[TMP208]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP217]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP210:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP219:%.*]] = load i32, ptr [[TMP210]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP219]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP212:%.*]] = getelementptr inbounds i32, ptr [[TMP190]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP221:%.*]] = load i32, ptr [[TMP212]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP221]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP214:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META23]], !continuation.returnedRegistercount !23 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP223:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_STRUCT_DISPATCHSYSTEMDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP214]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_OUTERPAYLOAD]] poison, ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP224:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP225:%.*]] = getelementptr i32, ptr [[TMP224]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP226:%.*]] = getelementptr i32, ptr [[TMP225]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP227:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP227]], ptr [[TMP226]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP228:%.*]] = getelementptr i32, ptr [[TMP225]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP229:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP229]], ptr [[TMP228]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP230:%.*]] = getelementptr i32, ptr [[TMP225]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP231:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP231]], ptr [[TMP230]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP232:%.*]] = getelementptr i32, ptr [[TMP225]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP233:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP233]], ptr [[TMP232]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP234:%.*]] = getelementptr i32, ptr [[TMP225]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP235:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 4), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP235]], ptr [[TMP234]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP236:%.*]] = getelementptr i32, ptr [[TMP225]], i64 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP237:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP237]], ptr [[TMP236]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP238:%.*]] = getelementptr i32, ptr [[TMP225]], i64 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP239:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 6), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP239]], ptr [[TMP238]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP240:%.*]] = getelementptr i32, ptr [[TMP225]], i64 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP241:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP241]], ptr [[TMP240]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP242:%.*]] = getelementptr i32, ptr [[TMP225]], i64 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP243:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP243]], ptr [[TMP242]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP244:%.*]] = getelementptr i32, ptr [[TMP225]], i64 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP245:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP245]], ptr [[TMP244]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP246:%.*]] = getelementptr i32, ptr [[TMP225]], i64 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP247:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 10), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP247]], ptr [[TMP246]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP248:%.*]] = getelementptr i32, ptr [[TMP225]], i64 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP249:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 11), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP249]], ptr [[TMP248]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP250:%.*]] = getelementptr i32, ptr [[TMP225]], i64 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP251:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 12), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP251]], ptr [[TMP250]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP252:%.*]] = getelementptr i32, ptr [[TMP225]], i64 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP253:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 13), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP253]], ptr [[TMP252]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP254:%.*]] = getelementptr i32, ptr [[TMP225]], i64 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP255:%.*]] = load i32, ptr getelementptr (i32, ptr @PAYLOAD, i64 14), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP255]], ptr [[TMP254]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP227:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP227]], ptr [[TMP224]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP218:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP229:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP229]], ptr [[TMP218]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP220:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP231:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP231]], ptr [[TMP220]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP222:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP233:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 3), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP233]], ptr [[TMP222]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP225:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP235:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 4), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP235]], ptr [[TMP225]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP226:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP237:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 5), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP237]], ptr [[TMP226]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP228:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP239:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 6), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP239]], ptr [[TMP228]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP230:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP241:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP241]], ptr [[TMP230]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP232:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP243:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP243]], ptr [[TMP232]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP234:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP245:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP245]], ptr [[TMP234]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP236:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP247:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP247]], ptr [[TMP236]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP238:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP249:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP249]], ptr [[TMP238]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP240:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP251:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP251]], ptr [[TMP240]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP242:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP253:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP253]], ptr [[TMP242]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP244:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP255:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP255]], ptr [[TMP244]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP256:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP257:%.*]] = getelementptr i32, ptr [[TMP256]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP258:%.*]] = getelementptr i32, ptr [[TMP257]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP259:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i32 15), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP259]], ptr [[TMP258]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP260:%.*]] = getelementptr i32, ptr [[TMP257]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP261:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP261]], ptr [[TMP260]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP262:%.*]] = getelementptr i32, ptr [[TMP257]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP263:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP263]], ptr [[TMP262]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP264:%.*]] = getelementptr i32, ptr [[TMP257]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP265:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP265]], ptr [[TMP264]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP266:%.*]] = getelementptr i32, ptr [[TMP257]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP267:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP267]], ptr [[TMP266]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP268:%.*]] = getelementptr i32, ptr [[TMP257]], i64 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP269:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP269]], ptr [[TMP268]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP270:%.*]] = getelementptr i32, ptr [[TMP257]], i64 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP271:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP271]], ptr [[TMP270]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP272:%.*]] = getelementptr i32, ptr [[TMP257]], i64 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP273:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP273]], ptr [[TMP272]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP274:%.*]] = getelementptr i32, ptr [[TMP257]], i64 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP275:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP275]], ptr [[TMP274]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP276:%.*]] = getelementptr i32, ptr [[TMP257]], i64 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP277:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP277]], ptr [[TMP276]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP278:%.*]] = getelementptr i32, ptr [[TMP257]], i64 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP279:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP279]], ptr [[TMP278]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP280:%.*]] = getelementptr i32, ptr [[TMP257]], i64 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP281:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP281]], ptr [[TMP280]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP282:%.*]] = getelementptr i32, ptr [[TMP257]], i64 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP283:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP283]], ptr [[TMP282]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP284:%.*]] = getelementptr i32, ptr [[TMP257]], i64 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP285:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP285]], ptr [[TMP284]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP286:%.*]] = getelementptr i32, ptr [[TMP257]], i64 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP287:%.*]] = load i32, ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP287]], ptr [[TMP286]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP259:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP259]], ptr [[TMP256]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP248:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP261:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP261]], ptr [[TMP248]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP250:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP263:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP263]], ptr [[TMP250]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP252:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP265:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP265]], ptr [[TMP252]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP254:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP267:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP267]], ptr [[TMP254]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP257:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP269:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP269]], ptr [[TMP257]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP258:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP270:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP270]], ptr [[TMP258]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP260:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP272:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP272]], ptr [[TMP260]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP262:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP274:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP274]], ptr [[TMP262]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP264:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP276:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP276]], ptr [[TMP264]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP266:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP278:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP278]], ptr [[TMP266]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP268:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP280:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP280]], ptr [[TMP268]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP271:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP282:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP282]], ptr [[TMP271]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP273:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP284:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP284]], ptr [[TMP273]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP275:%.*]] = getelementptr inbounds i32, ptr [[TMP256]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP286:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP286]], ptr [[TMP275]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP223]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP288:%.*]] = load float, ptr [[TMP68]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP288]], ptr [[TMP69]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP289:%.*]] = load float, ptr [[TMP71]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP289]], ptr [[TMP72]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP277:%.*]] = load float, ptr [[TMP71]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP277]], ptr [[TMP72]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP290:%.*]] = load float, ptr [[TMP74]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP290]], ptr [[TMP75]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP291:%.*]] = load float, ptr [[TMP77]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP291]], ptr [[TMP78]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP279:%.*]] = load float, ptr [[TMP77]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP279]], ptr [[TMP78]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP292:%.*]] = load float, ptr [[TMP80]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP292]], ptr [[TMP81]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP293:%.*]] = load float, ptr [[TMP83]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP293]], ptr [[TMP84]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP281:%.*]] = load float, ptr [[TMP83]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP281]], ptr [[TMP84]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP294:%.*]] = load float, ptr [[TMP86]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP294]], ptr [[TMP87]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP295:%.*]] = load float, ptr [[TMP89]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP295]], ptr [[TMP90]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP283:%.*]] = load float, ptr [[TMP89]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP283]], ptr [[TMP90]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP296:%.*]] = load float, ptr [[TMP92]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP296]], ptr [[TMP93]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP297:%.*]] = load float, ptr [[TMP95]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP297]], ptr [[TMP96]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP285:%.*]] = load float, ptr [[TMP95]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP285]], ptr [[TMP96]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP298:%.*]] = load float, ptr [[TMP98]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP298]], ptr [[TMP99]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP299:%.*]] = load float, ptr [[TMP101]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP299]], ptr [[TMP102]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP287:%.*]] = load float, ptr [[TMP101]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP287]], ptr [[TMP102]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP300:%.*]] = load float, ptr [[TMP104]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP300]], ptr [[TMP105]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP301:%.*]] = load float, ptr [[TMP107]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP301]], ptr [[TMP108]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP289:%.*]] = load float, ptr [[TMP107]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP289]], ptr [[TMP108]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP302:%.*]] = load float, ptr [[TMP110]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP302]], ptr [[TMP111]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP303:%.*]] = load float, ptr [[TMP113]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP303]], ptr [[TMP114]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP304:%.*]] = load float, ptr [[TMP116]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP304]], ptr [[TMP117]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP305:%.*]] = load float, ptr [[TMP119]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP305]], ptr [[TMP120]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP306:%.*]] = load float, ptr [[TMP122]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP306]], ptr [[TMP123]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP307:%.*]] = load float, ptr [[TMP125]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP307]], ptr [[TMP126]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP308:%.*]] = load float, ptr [[TMP128]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP308]], ptr [[TMP129]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP309:%.*]] = load float, ptr [[TMP131]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP309]], ptr [[TMP132]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP310:%.*]] = load float, ptr [[TMP134]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP310]], ptr [[TMP135]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP311:%.*]] = load float, ptr [[TMP137]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP311]], ptr [[TMP138]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP312:%.*]] = load float, ptr [[TMP140]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP312]], ptr [[TMP141]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP313:%.*]] = load float, ptr [[TMP143]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP313]], ptr [[TMP144]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP314:%.*]] = load float, ptr [[TMP146]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP314]], ptr [[TMP147]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP315:%.*]] = load float, ptr [[TMP149]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP315]], ptr [[TMP150]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP316:%.*]] = load float, ptr [[TMP152]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP316]], ptr [[TMP153]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP317:%.*]] = load float, ptr [[TMP155]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP317]], ptr [[TMP156]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP291:%.*]] = load float, ptr [[TMP113]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP291]], ptr [[TMP114]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP306:%.*]] = load float, ptr [[TMP116]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP306]], ptr [[TMP117]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP293:%.*]] = load float, ptr [[TMP119]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP293]], ptr [[TMP120]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP307:%.*]] = load float, ptr [[TMP122]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP307]], ptr [[TMP123]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP295:%.*]] = load float, ptr [[TMP125]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP295]], ptr [[TMP126]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP309:%.*]] = load float, ptr [[TMP128]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP309]], ptr [[TMP129]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP297:%.*]] = load float, ptr [[TMP131]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP297]], ptr [[TMP132]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP311:%.*]] = load float, ptr [[TMP134]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP311]], ptr [[TMP135]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP299:%.*]] = load float, ptr [[TMP137]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP299]], ptr [[TMP138]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP313:%.*]] = load float, ptr [[TMP140]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP313]], ptr [[TMP141]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP301:%.*]] = load float, ptr [[TMP143]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP301]], ptr [[TMP144]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP315:%.*]] = load float, ptr [[TMP146]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP315]], ptr [[TMP147]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP303:%.*]] = load float, ptr [[TMP149]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP303]], ptr [[TMP150]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP304:%.*]] = load float, ptr [[TMP152]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP304]], ptr [[TMP153]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP305:%.*]] = load float, ptr [[TMP155]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP305]], ptr [[TMP156]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP318:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP319:%.*]] = getelementptr i32, ptr [[TMP318]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP320:%.*]] = getelementptr i32, ptr [[TMP319]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP321:%.*]] = load i32, ptr [[TMP320]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP321]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP322:%.*]] = getelementptr i32, ptr [[TMP319]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP323:%.*]] = load i32, ptr [[TMP322]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP323]], ptr getelementptr (i32, ptr @PAYLOAD, i64 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP324:%.*]] = getelementptr i32, ptr [[TMP319]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP325:%.*]] = load i32, ptr [[TMP324]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP325]], ptr getelementptr (i32, ptr @PAYLOAD, i64 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP326:%.*]] = getelementptr i32, ptr [[TMP319]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP327:%.*]] = load i32, ptr [[TMP326]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP327]], ptr getelementptr (i32, ptr @PAYLOAD, i64 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP328:%.*]] = getelementptr i32, ptr [[TMP319]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP329:%.*]] = load i32, ptr [[TMP328]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP329]], ptr getelementptr (i32, ptr @PAYLOAD, i64 4), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP330:%.*]] = getelementptr i32, ptr [[TMP319]], i64 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP331:%.*]] = load i32, ptr [[TMP330]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP331]], ptr getelementptr (i32, ptr @PAYLOAD, i64 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP332:%.*]] = getelementptr i32, ptr [[TMP319]], i64 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP333:%.*]] = load i32, ptr [[TMP332]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP333]], ptr getelementptr (i32, ptr @PAYLOAD, i64 6), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP334:%.*]] = getelementptr i32, ptr [[TMP319]], i64 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP335:%.*]] = load i32, ptr [[TMP334]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP335]], ptr getelementptr (i32, ptr @PAYLOAD, i64 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP336:%.*]] = getelementptr i32, ptr [[TMP319]], i64 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP337:%.*]] = load i32, ptr [[TMP336]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP337]], ptr getelementptr (i32, ptr @PAYLOAD, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP338:%.*]] = getelementptr i32, ptr [[TMP319]], i64 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP339:%.*]] = load i32, ptr [[TMP338]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP339]], ptr getelementptr (i32, ptr @PAYLOAD, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP340:%.*]] = getelementptr i32, ptr [[TMP319]], i64 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP341:%.*]] = load i32, ptr [[TMP340]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP341]], ptr getelementptr (i32, ptr @PAYLOAD, i64 10), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP342:%.*]] = getelementptr i32, ptr [[TMP319]], i64 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP343:%.*]] = load i32, ptr [[TMP342]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP343]], ptr getelementptr (i32, ptr @PAYLOAD, i64 11), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP344:%.*]] = getelementptr i32, ptr [[TMP319]], i64 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP345:%.*]] = load i32, ptr [[TMP344]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP345]], ptr getelementptr (i32, ptr @PAYLOAD, i64 12), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP346:%.*]] = getelementptr i32, ptr [[TMP319]], i64 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP347:%.*]] = load i32, ptr [[TMP346]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP347]], ptr getelementptr (i32, ptr @PAYLOAD, i64 13), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP348:%.*]] = getelementptr i32, ptr [[TMP319]], i64 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP349:%.*]] = load i32, ptr [[TMP348]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP349]], ptr getelementptr (i32, ptr @PAYLOAD, i64 14), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP321:%.*]] = load i32, ptr [[TMP318]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP321]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP308:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP323:%.*]] = load i32, ptr [[TMP308]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP323]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP310:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP325:%.*]] = load i32, ptr [[TMP310]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP325]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP312:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP327:%.*]] = load i32, ptr [[TMP312]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP327]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 3), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP314:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP329:%.*]] = load i32, ptr [[TMP314]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP329]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 4), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP316:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP331:%.*]] = load i32, ptr [[TMP316]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP331]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 5), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP319:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP333:%.*]] = load i32, ptr [[TMP319]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP333]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 6), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP320:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP335:%.*]] = load i32, ptr [[TMP320]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP335]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP322:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP337:%.*]] = load i32, ptr [[TMP322]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP337]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP324:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP339:%.*]] = load i32, ptr [[TMP324]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP339]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP326:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP341:%.*]] = load i32, ptr [[TMP326]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP341]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP328:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP343:%.*]] = load i32, ptr [[TMP328]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP343]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP330:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP345:%.*]] = load i32, ptr [[TMP330]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP345]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP332:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP347:%.*]] = load i32, ptr [[TMP332]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP347]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP334:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP349:%.*]] = load i32, ptr [[TMP334]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP349]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP350:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP351:%.*]] = getelementptr i32, ptr [[TMP350]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP352:%.*]] = getelementptr i32, ptr [[TMP351]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP353:%.*]] = load i32, ptr [[TMP352]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP353]], ptr getelementptr inbounds ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i32 15), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP354:%.*]] = getelementptr i32, ptr [[TMP351]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP355:%.*]] = load i32, ptr [[TMP354]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP355]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP356:%.*]] = getelementptr i32, ptr [[TMP351]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP357:%.*]] = load i32, ptr [[TMP356]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP357]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP358:%.*]] = getelementptr i32, ptr [[TMP351]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP359:%.*]] = load i32, ptr [[TMP358]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP359]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP360:%.*]] = getelementptr i32, ptr [[TMP351]], i64 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP361:%.*]] = load i32, ptr [[TMP360]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP361]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP362:%.*]] = getelementptr i32, ptr [[TMP351]], i64 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP363:%.*]] = load i32, ptr [[TMP362]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP363]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP364:%.*]] = getelementptr i32, ptr [[TMP351]], i64 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP365:%.*]] = load i32, ptr [[TMP364]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP365]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP366:%.*]] = getelementptr i32, ptr [[TMP351]], i64 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP367:%.*]] = load i32, ptr [[TMP366]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP367]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP368:%.*]] = getelementptr i32, ptr [[TMP351]], i64 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP369:%.*]] = load i32, ptr [[TMP368]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP369]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP370:%.*]] = getelementptr i32, ptr [[TMP351]], i64 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP371:%.*]] = load i32, ptr [[TMP370]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP371]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP372:%.*]] = getelementptr i32, ptr [[TMP351]], i64 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP373:%.*]] = load i32, ptr [[TMP372]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP373]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP374:%.*]] = getelementptr i32, ptr [[TMP351]], i64 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP375:%.*]] = load i32, ptr [[TMP374]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP375]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP376:%.*]] = getelementptr i32, ptr [[TMP351]], i64 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP377:%.*]] = load i32, ptr [[TMP376]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP377]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP378:%.*]] = getelementptr i32, ptr [[TMP351]], i64 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP379:%.*]] = load i32, ptr [[TMP378]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP379]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP380:%.*]] = getelementptr i32, ptr [[TMP351]], i64 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP381:%.*]] = load i32, ptr [[TMP380]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP381]], ptr getelementptr ([[STRUCT_OUTERPAYLOAD_LAYOUT_CALLSHADER]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP353:%.*]] = load i32, ptr [[TMP350]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP353]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP338:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP355:%.*]] = load i32, ptr [[TMP338]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP355]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP340:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP357:%.*]] = load i32, ptr [[TMP340]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP357]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP342:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP359:%.*]] = load i32, ptr [[TMP342]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP359]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP344:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP361:%.*]] = load i32, ptr [[TMP344]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP361]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP346:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP363:%.*]] = load i32, ptr [[TMP346]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP363]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP348:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP365:%.*]] = load i32, ptr [[TMP348]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP365]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP351:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP367:%.*]] = load i32, ptr [[TMP351]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP367]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP352:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP369:%.*]] = load i32, ptr [[TMP352]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP369]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP354:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP371:%.*]] = load i32, ptr [[TMP354]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP371]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP356:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP373:%.*]] = load i32, ptr [[TMP356]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP373]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP358:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP375:%.*]] = load i32, ptr [[TMP358]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP375]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP360:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP377:%.*]] = load i32, ptr [[TMP360]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP377]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP362:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP379:%.*]] = load i32, ptr [[TMP362]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP379]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP364:%.*]] = getelementptr inbounds i32, ptr [[TMP350]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP381:%.*]] = load i32, ptr [[TMP364]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP381]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP382:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP382]], !continuation.registercount [[META24]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP382]], !continuation.registercount [[META23]] ; %1 = alloca %struct.OuterPayload, align 8 %2 = getelementptr inbounds %struct.OuterPayload, %struct.OuterPayload* %1, i32 0, i32 0, i32 0 diff --git a/llvmraytracing/test/dx/payload.ll b/llvmraytracing/test/dx/payload.ll index 9d38132eb3..4bd2b8c855 100644 --- a/llvmraytracing/test/dx/payload.ll +++ b/llvmraytracing/test/dx/payload.ll @@ -196,143 +196,196 @@ attributes #3 = { nounwind } ; ; ; CLEANUP-LABEL: define void @main( -; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !continuation.entry [[META20:![0-9]+]] !continuation.registercount [[META9]] !continuation [[META21:![0-9]+]] !continuation.stacksize [[META22:![0-9]+]] !continuation.state [[META9]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META20:![0-9]+]] !continuation.stacksize [[META21:![0-9]+]] !continuation.state [[META8]] { ; CLEANUP-NEXT: AllocaSpillBB: -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MAIN_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 108) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MAIN_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; CLEANUP-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) -; CLEANUP-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP4]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; CLEANUP-NEXT: [[TMP6:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP5]]) +; CLEANUP-NEXT: [[TMP1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; CLEANUP-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP1]]) +; CLEANUP-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP3]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-NEXT: [[TMP5:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP4]]) ; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 ; CLEANUP-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; CLEANUP-NEXT: [[TMP7:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 -; CLEANUP-NEXT: store i32 [[TMP7]], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: store i32 undef, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 -; CLEANUP-NEXT: store i32 undef, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 -; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 108) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @main.resume.0 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18:![0-9]+]], !continuation.returnedRegistercount !18 +; CLEANUP-NEXT: [[TMP6:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 +; CLEANUP-NEXT: store i32 [[TMP6]], ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP7:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP7]], align 4 +; CLEANUP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 1 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP8]], align 4 +; CLEANUP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 2 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP9]], align 4 +; CLEANUP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 3 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP10]], align 4 +; CLEANUP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 4 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP11]], align 4 +; CLEANUP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 5 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP12]], align 4 +; CLEANUP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 6 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP13]], align 4 +; CLEANUP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 7 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP14]], align 4 +; CLEANUP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 8 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP15]], align 4 +; CLEANUP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 9 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP16]], align 4 +; CLEANUP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 10 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP17]], align 4 +; CLEANUP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 11 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP18]], align 4 +; CLEANUP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 12 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP19]], align 4 +; CLEANUP-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 13 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP20]], align 4 +; CLEANUP-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 14 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP21]], align 4 +; CLEANUP-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 15 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP22]], align 4 +; CLEANUP-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 16 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP23]], align 4 +; CLEANUP-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 17 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP24]], align 4 +; CLEANUP-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 18 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP25]], align 4 +; CLEANUP-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 19 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP26]], align 4 +; CLEANUP-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 20 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP27]], align 4 +; CLEANUP-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 21 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP28]], align 4 +; CLEANUP-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 22 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP29]], align 4 +; CLEANUP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 23 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP30]], align 4 +; CLEANUP-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 24 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP31]], align 4 +; CLEANUP-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 25 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP32]], align 4 +; CLEANUP-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 26 +; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP33]], align 4 +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @main.resume.0 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META17:![0-9]+]], !continuation.returnedRegistercount !17 ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @main.resume.0( -; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9]] !continuation.registercount [[META18]] !continuation [[META21]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META17]] !continuation [[META20]] { ; CLEANUP-NEXT: entryresume.0: -; CLEANUP-NEXT: call void @lgc.cps.free(i32 108) -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MAIN_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 -; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 -; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 -; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 -; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 -; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 -; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 -; CLEANUP-NEXT: [[TMP28:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 -; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 -; CLEANUP-NEXT: [[TMP30:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 -; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 -; CLEANUP-NEXT: [[TMP32:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 -; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 -; CLEANUP-NEXT: [[TMP34:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 -; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 -; CLEANUP-NEXT: [[TMP36:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 -; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 -; CLEANUP-NEXT: [[TMP38:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 -; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 -; CLEANUP-NEXT: [[TMP40:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 -; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 -; CLEANUP-NEXT: [[TMP42:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 -; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 -; CLEANUP-NEXT: [[TMP44:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 -; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 -; CLEANUP-NEXT: [[TMP46:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 -; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 -; CLEANUP-NEXT: [[TMP48:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 -; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 -; CLEANUP-NEXT: [[TMP50:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 -; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 108) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MAIN_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CLEANUP-NEXT: [[TMP1:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 +; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 +; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 +; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 +; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 +; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 +; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 +; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 +; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 +; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 +; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; CLEANUP-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(32) [[TMP1]], align 4 +; CLEANUP-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 1 +; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(32) [[TMP26]], align 4 +; CLEANUP-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 2 +; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(32) [[TMP28]], align 4 +; CLEANUP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 3 +; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(32) [[TMP30]], align 4 +; CLEANUP-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 4 +; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(32) [[TMP32]], align 4 +; CLEANUP-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 5 +; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(32) [[TMP34]], align 4 +; CLEANUP-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 6 +; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(32) [[TMP36]], align 4 +; CLEANUP-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 7 +; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(32) [[TMP38]], align 4 +; CLEANUP-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 8 +; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(32) [[TMP40]], align 4 +; CLEANUP-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 9 +; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(32) [[TMP42]], align 4 +; CLEANUP-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 10 +; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(32) [[TMP44]], align 4 +; CLEANUP-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 11 +; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(32) [[TMP46]], align 4 +; CLEANUP-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 12 +; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(32) [[TMP48]], align 4 +; CLEANUP-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 13 +; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(32) [[TMP50]], align 4 +; CLEANUP-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 14 +; CLEANUP-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(32) [[TMP52]], align 4 +; CLEANUP-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 15 +; CLEANUP-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(32) [[TMP54]], align 4 +; CLEANUP-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 16 +; CLEANUP-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(32) [[TMP56]], align 4 +; CLEANUP-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 17 +; CLEANUP-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(32) [[TMP58]], align 4 +; CLEANUP-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 18 +; CLEANUP-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(32) [[TMP60]], align 4 +; CLEANUP-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 19 +; CLEANUP-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(32) [[TMP62]], align 4 +; CLEANUP-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 20 +; CLEANUP-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(32) [[TMP64]], align 4 +; CLEANUP-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 21 +; CLEANUP-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(32) [[TMP66]], align 4 +; CLEANUP-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 22 +; CLEANUP-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(32) [[TMP68]], align 4 +; CLEANUP-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 23 +; CLEANUP-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(32) [[TMP70]], align 4 +; CLEANUP-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 24 +; CLEANUP-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(32) [[TMP72]], align 4 +; CLEANUP-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 25 +; CLEANUP-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(32) [[TMP74]], align 4 +; CLEANUP-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 26 +; CLEANUP-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(32) [[TMP76]], align 4 +; CLEANUP-NEXT: [[TMP80:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; CLEANUP-NEXT: call void @lgc.cps.free(i32 108) ; CLEANUP-NEXT: ret void ; CLEANUP: entryresume.0.split: ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define void @AnyHit( -; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META18]] !continuation [[META24:![0-9]+]] !continuation.state [[META9]] { +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.registercount [[META17]] !continuation [[META23:![0-9]+]] !continuation.state [[META8]] { ; CLEANUP-NEXT: AllocaSpillBB: ; CLEANUP-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 ; CLEANUP-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 @@ -366,132 +419,184 @@ attributes #3 = { nounwind } ; CLEANUP-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; CLEANUP-NEXT: store i32 [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 ; CLEANUP-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 -; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 -; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 -; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 -; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 -; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 -; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 -; CLEANUP-NEXT: [[TMP28:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 -; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 -; CLEANUP-NEXT: [[TMP30:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 -; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 -; CLEANUP-NEXT: [[TMP32:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 -; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 -; CLEANUP-NEXT: [[TMP34:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 -; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 -; CLEANUP-NEXT: [[TMP36:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 -; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 -; CLEANUP-NEXT: [[TMP38:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 -; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 -; CLEANUP-NEXT: [[TMP40:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 -; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 -; CLEANUP-NEXT: [[TMP42:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 -; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 -; CLEANUP-NEXT: [[TMP44:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 -; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 -; CLEANUP-NEXT: [[TMP46:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 -; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 -; CLEANUP-NEXT: [[TMP48:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 -; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 -; CLEANUP-NEXT: [[TMP50:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 -; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 -; CLEANUP-NEXT: [[TMP52:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; CLEANUP-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP53]], i32 0, i32 1 +; CLEANUP-NEXT: [[TMP3:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 +; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 +; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 +; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 +; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 +; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 +; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 +; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 +; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 +; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 +; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; CLEANUP-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(32) [[TMP3]], align 4 +; CLEANUP-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 1 +; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(32) [[TMP28]], align 4 +; CLEANUP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 2 +; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(32) [[TMP30]], align 4 +; CLEANUP-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 3 +; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(32) [[TMP32]], align 4 +; CLEANUP-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 4 +; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(32) [[TMP34]], align 4 +; CLEANUP-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 5 +; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(32) [[TMP36]], align 4 +; CLEANUP-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 6 +; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(32) [[TMP38]], align 4 +; CLEANUP-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 7 +; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(32) [[TMP40]], align 4 +; CLEANUP-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 8 +; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(32) [[TMP42]], align 4 +; CLEANUP-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 9 +; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(32) [[TMP44]], align 4 +; CLEANUP-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 10 +; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(32) [[TMP46]], align 4 +; CLEANUP-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 11 +; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(32) [[TMP48]], align 4 +; CLEANUP-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 12 +; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(32) [[TMP50]], align 4 +; CLEANUP-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 13 +; CLEANUP-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(32) [[TMP52]], align 4 +; CLEANUP-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 14 +; CLEANUP-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(32) [[TMP54]], align 4 +; CLEANUP-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 15 +; CLEANUP-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(32) [[TMP56]], align 4 +; CLEANUP-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 16 +; CLEANUP-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(32) [[TMP58]], align 4 +; CLEANUP-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 17 +; CLEANUP-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(32) [[TMP60]], align 4 +; CLEANUP-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 18 +; CLEANUP-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(32) [[TMP62]], align 4 +; CLEANUP-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 19 +; CLEANUP-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(32) [[TMP64]], align 4 +; CLEANUP-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 20 +; CLEANUP-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(32) [[TMP66]], align 4 +; CLEANUP-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 21 +; CLEANUP-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(32) [[TMP68]], align 4 +; CLEANUP-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 22 +; CLEANUP-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(32) [[TMP70]], align 4 +; CLEANUP-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 23 +; CLEANUP-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(32) [[TMP72]], align 4 +; CLEANUP-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 24 +; CLEANUP-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(32) [[TMP74]], align 4 +; CLEANUP-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 25 +; CLEANUP-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(32) [[TMP76]], align 4 +; CLEANUP-NEXT: [[TMP78:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 26 +; CLEANUP-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(32) [[TMP78]], align 4 +; CLEANUP-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP82]], i32 0, i32 1 ; CLEANUP-NEXT: [[VAL_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], i32 0, i32 0 ; CLEANUP-NEXT: [[VAL_I_FCA_0_LOAD:%.*]] = load <2 x float>, ptr [[VAL_I_FCA_0_GEP]], align 4 ; CLEANUP-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[VAL_I_FCA_0_LOAD]], 0 ; CLEANUP-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[DOTSROA_011_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; CLEANUP-NEXT: [[TMP54:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[TMP83:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 ; CLEANUP-NEXT: [[DOTSROA_011_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; CLEANUP-NEXT: [[TMP55:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[TMP84:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: store i32 [[TMP3]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; CLEANUP-NEXT: store i32 [[TMP4]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; CLEANUP-NEXT: store i32 [[TMP5]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; CLEANUP-NEXT: store i32 [[TMP6]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 -; CLEANUP-NEXT: store i32 [[TMP7]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 -; CLEANUP-NEXT: store i32 [[TMP8]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 -; CLEANUP-NEXT: store i32 [[TMP9]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 -; CLEANUP-NEXT: store i32 [[TMP10]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 -; CLEANUP-NEXT: store i32 [[TMP11]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 -; CLEANUP-NEXT: store i32 [[TMP12]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; CLEANUP-NEXT: store i32 [[TMP13]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; CLEANUP-NEXT: store i32 [[TMP14]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; CLEANUP-NEXT: store i32 [[TMP15]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; CLEANUP-NEXT: store i32 [[TMP16]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; CLEANUP-NEXT: store i32 [[TMP17]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; CLEANUP-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; CLEANUP-NEXT: store i32 [[TMP19]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; CLEANUP-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; CLEANUP-NEXT: store i32 [[TMP21]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; CLEANUP-NEXT: store i32 [[TMP22]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; CLEANUP-NEXT: store i32 [[TMP23]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; CLEANUP-NEXT: store i32 [[TMP24]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; CLEANUP-NEXT: store i32 [[TMP25]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; CLEANUP-NEXT: store i32 [[TMP26]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 -; CLEANUP-NEXT: store i32 [[TMP27]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 -; CLEANUP-NEXT: store i32 [[TMP28]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 -; CLEANUP-NEXT: store i32 [[TMP29]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 -; CLEANUP-NEXT: store i32 [[TMP30]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 -; CLEANUP-NEXT: store i32 [[TMP31]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 -; CLEANUP-NEXT: store i32 [[TMP32]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 -; CLEANUP-NEXT: store i32 [[TMP33]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 -; CLEANUP-NEXT: store i32 [[TMP34]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 -; CLEANUP-NEXT: store i32 [[TMP35]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 -; CLEANUP-NEXT: store i32 [[TMP36]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 -; CLEANUP-NEXT: store i32 [[TMP37]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 -; CLEANUP-NEXT: store i32 [[TMP38]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 -; CLEANUP-NEXT: store i32 [[TMP39]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 -; CLEANUP-NEXT: store i32 [[TMP40]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 -; CLEANUP-NEXT: store i32 [[TMP41]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 -; CLEANUP-NEXT: store i32 [[TMP42]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 -; CLEANUP-NEXT: store i32 [[TMP43]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 -; CLEANUP-NEXT: store i32 [[TMP44]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 -; CLEANUP-NEXT: store i32 [[TMP45]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 -; CLEANUP-NEXT: store i32 [[TMP46]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 -; CLEANUP-NEXT: store i32 [[TMP47]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 -; CLEANUP-NEXT: store i32 [[TMP48]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 -; CLEANUP-NEXT: store i32 [[TMP49]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 -; CLEANUP-NEXT: store i32 [[TMP50]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 -; CLEANUP-NEXT: store i32 [[TMP51]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 -; CLEANUP-NEXT: store i32 [[TMP52]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 +; CLEANUP-NEXT: [[TMP85:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; CLEANUP-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; CLEANUP-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; CLEANUP-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; CLEANUP-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 +; CLEANUP-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 +; CLEANUP-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; CLEANUP-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 +; CLEANUP-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 +; CLEANUP-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 +; CLEANUP-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 +; CLEANUP-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 +; CLEANUP-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 +; CLEANUP-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 +; CLEANUP-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 +; CLEANUP-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; CLEANUP-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; CLEANUP-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; CLEANUP-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; CLEANUP-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; CLEANUP-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; CLEANUP-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; CLEANUP-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; CLEANUP-NEXT: store i32 [[TMP80]], ptr addrspace(32) [[TMP85]], align 4 +; CLEANUP-NEXT: [[TMP110:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 1 +; CLEANUP-NEXT: store i32 [[TMP29]], ptr addrspace(32) [[TMP110]], align 4 +; CLEANUP-NEXT: [[TMP111:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 2 +; CLEANUP-NEXT: store i32 [[TMP31]], ptr addrspace(32) [[TMP111]], align 4 +; CLEANUP-NEXT: [[TMP86:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 3 +; CLEANUP-NEXT: store i32 [[TMP33]], ptr addrspace(32) [[TMP86]], align 4 +; CLEANUP-NEXT: [[TMP87:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 4 +; CLEANUP-NEXT: store i32 [[TMP35]], ptr addrspace(32) [[TMP87]], align 4 +; CLEANUP-NEXT: [[TMP88:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 5 +; CLEANUP-NEXT: store i32 [[TMP37]], ptr addrspace(32) [[TMP88]], align 4 +; CLEANUP-NEXT: [[TMP89:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 6 +; CLEANUP-NEXT: store i32 [[TMP39]], ptr addrspace(32) [[TMP89]], align 4 +; CLEANUP-NEXT: [[TMP90:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 7 +; CLEANUP-NEXT: store i32 [[TMP41]], ptr addrspace(32) [[TMP90]], align 4 +; CLEANUP-NEXT: [[TMP91:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 8 +; CLEANUP-NEXT: store i32 [[TMP43]], ptr addrspace(32) [[TMP91]], align 4 +; CLEANUP-NEXT: [[TMP92:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 9 +; CLEANUP-NEXT: store i32 [[TMP45]], ptr addrspace(32) [[TMP92]], align 4 +; CLEANUP-NEXT: [[TMP93:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 10 +; CLEANUP-NEXT: store i32 [[TMP47]], ptr addrspace(32) [[TMP93]], align 4 +; CLEANUP-NEXT: [[TMP94:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 11 +; CLEANUP-NEXT: store i32 [[TMP49]], ptr addrspace(32) [[TMP94]], align 4 +; CLEANUP-NEXT: [[TMP95:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 12 +; CLEANUP-NEXT: store i32 [[TMP51]], ptr addrspace(32) [[TMP95]], align 4 +; CLEANUP-NEXT: [[TMP96:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 13 +; CLEANUP-NEXT: store i32 [[TMP53]], ptr addrspace(32) [[TMP96]], align 4 +; CLEANUP-NEXT: [[TMP97:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 14 +; CLEANUP-NEXT: store i32 [[TMP55]], ptr addrspace(32) [[TMP97]], align 4 +; CLEANUP-NEXT: [[TMP98:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 15 +; CLEANUP-NEXT: store i32 [[TMP57]], ptr addrspace(32) [[TMP98]], align 4 +; CLEANUP-NEXT: [[TMP99:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 16 +; CLEANUP-NEXT: store i32 [[TMP59]], ptr addrspace(32) [[TMP99]], align 4 +; CLEANUP-NEXT: [[TMP100:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 17 +; CLEANUP-NEXT: store i32 [[TMP61]], ptr addrspace(32) [[TMP100]], align 4 +; CLEANUP-NEXT: [[TMP101:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 18 +; CLEANUP-NEXT: store i32 [[TMP63]], ptr addrspace(32) [[TMP101]], align 4 +; CLEANUP-NEXT: [[TMP102:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 19 +; CLEANUP-NEXT: store i32 [[TMP65]], ptr addrspace(32) [[TMP102]], align 4 +; CLEANUP-NEXT: [[TMP103:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 20 +; CLEANUP-NEXT: store i32 [[TMP67]], ptr addrspace(32) [[TMP103]], align 4 +; CLEANUP-NEXT: [[TMP104:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 21 +; CLEANUP-NEXT: store i32 [[TMP69]], ptr addrspace(32) [[TMP104]], align 4 +; CLEANUP-NEXT: [[TMP105:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 22 +; CLEANUP-NEXT: store i32 [[TMP71]], ptr addrspace(32) [[TMP105]], align 4 +; CLEANUP-NEXT: [[TMP106:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 23 +; CLEANUP-NEXT: store i32 [[TMP73]], ptr addrspace(32) [[TMP106]], align 4 +; CLEANUP-NEXT: [[TMP107:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 24 +; CLEANUP-NEXT: store i32 [[TMP75]], ptr addrspace(32) [[TMP107]], align 4 +; CLEANUP-NEXT: [[TMP108:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 25 +; CLEANUP-NEXT: store i32 [[TMP77]], ptr addrspace(32) [[TMP108]], align 4 +; CLEANUP-NEXT: [[TMP109:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 26 +; CLEANUP-NEXT: store i32 [[TMP79]], ptr addrspace(32) [[TMP109]], align 4 ; CLEANUP-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; CLEANUP-NEXT: [[TMP56:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 -; CLEANUP-NEXT: [[TMP57:%.*]] = bitcast i32 [[TMP56]] to float -; CLEANUP-NEXT: [[DOTSROA_012_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP57]], i32 0 +; CLEANUP-NEXT: [[TMP114:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[TMP115:%.*]] = bitcast i32 [[TMP114]] to float +; CLEANUP-NEXT: [[DOTSROA_012_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP115]], i32 0 ; CLEANUP-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; CLEANUP-NEXT: [[TMP58:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 -; CLEANUP-NEXT: [[TMP59:%.*]] = bitcast i32 [[TMP58]] to float -; CLEANUP-NEXT: [[DOTSROA_012_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_012_0_VEC_INSERT]], float [[TMP59]], i32 1 +; CLEANUP-NEXT: [[TMP116:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[TMP117:%.*]] = bitcast i32 [[TMP116]] to float +; CLEANUP-NEXT: [[DOTSROA_012_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_012_0_VEC_INSERT]], float [[TMP117]], i32 1 ; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_012_4_VEC_INSERT]], 0 -; CLEANUP-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; CLEANUP-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP60]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) +; CLEANUP-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP118]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) ; CLEANUP-NEXT: [[DOTFCA_0_0_0_0_GEP1:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 ; CLEANUP-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP1]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 @@ -522,262 +627,367 @@ attributes #3 = { nounwind } ; CLEANUP-NEXT: [[DOTFCA_1_1_GEP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; CLEANUP-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP10]], align 4 ; CLEANUP-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META18]] +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META17]] ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define void @ClosestHit( -; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META25:![0-9]+]] !continuation.registercount [[META18]] !continuation [[META26:![0-9]+]] !continuation.stacksize [[META27:![0-9]+]] !continuation.state [[META28:![0-9]+]] { +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META24:![0-9]+]] !continuation.registercount [[META17]] !continuation [[META25:![0-9]+]] !continuation.stacksize [[META26:![0-9]+]] !continuation.state [[META27:![0-9]+]] { ; CLEANUP-NEXT: AllocaSpillBB: -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 -; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 1 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 120) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; CLEANUP-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 -; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 -; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 -; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 -; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 -; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 -; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 -; CLEANUP-NEXT: [[TMP28:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 -; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 -; CLEANUP-NEXT: [[TMP30:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 -; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 -; CLEANUP-NEXT: [[TMP32:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 -; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 -; CLEANUP-NEXT: [[TMP34:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 -; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 -; CLEANUP-NEXT: [[TMP36:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 -; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 -; CLEANUP-NEXT: [[TMP38:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 -; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 -; CLEANUP-NEXT: [[TMP40:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 -; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 -; CLEANUP-NEXT: [[TMP42:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 -; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 -; CLEANUP-NEXT: [[TMP44:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 -; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 -; CLEANUP-NEXT: [[TMP46:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 -; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 -; CLEANUP-NEXT: [[TMP48:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 -; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 -; CLEANUP-NEXT: [[TMP50:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 -; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: [[TMP52:%.*]] = load i32, ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 2 -; CLEANUP-NEXT: store i32 [[TMP52]], ptr addrspace(32) [[DOTSPILL_ADDR]], align 4 +; CLEANUP-NEXT: [[TMP1:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 +; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 +; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 +; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 +; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 +; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 +; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 +; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 +; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 +; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 +; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; CLEANUP-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(32) [[TMP1]], align 4 +; CLEANUP-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 1 +; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(32) [[TMP26]], align 4 +; CLEANUP-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 2 +; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(32) [[TMP28]], align 4 +; CLEANUP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 3 +; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(32) [[TMP30]], align 4 +; CLEANUP-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 4 +; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(32) [[TMP32]], align 4 +; CLEANUP-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 5 +; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(32) [[TMP34]], align 4 +; CLEANUP-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 6 +; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(32) [[TMP36]], align 4 +; CLEANUP-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 7 +; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(32) [[TMP38]], align 4 +; CLEANUP-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 8 +; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(32) [[TMP40]], align 4 +; CLEANUP-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 9 +; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(32) [[TMP42]], align 4 +; CLEANUP-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 10 +; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(32) [[TMP44]], align 4 +; CLEANUP-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 11 +; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(32) [[TMP46]], align 4 +; CLEANUP-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 12 +; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(32) [[TMP48]], align 4 +; CLEANUP-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 13 +; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(32) [[TMP50]], align 4 +; CLEANUP-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 14 +; CLEANUP-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(32) [[TMP52]], align 4 +; CLEANUP-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 15 +; CLEANUP-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(32) [[TMP54]], align 4 +; CLEANUP-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 16 +; CLEANUP-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(32) [[TMP56]], align 4 +; CLEANUP-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 17 +; CLEANUP-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(32) [[TMP58]], align 4 +; CLEANUP-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 18 +; CLEANUP-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(32) [[TMP60]], align 4 +; CLEANUP-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 19 +; CLEANUP-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(32) [[TMP62]], align 4 +; CLEANUP-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 20 +; CLEANUP-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(32) [[TMP64]], align 4 +; CLEANUP-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 21 +; CLEANUP-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(32) [[TMP66]], align 4 +; CLEANUP-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 22 +; CLEANUP-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(32) [[TMP68]], align 4 +; CLEANUP-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 23 +; CLEANUP-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(32) [[TMP70]], align 4 +; CLEANUP-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 24 +; CLEANUP-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(32) [[TMP72]], align 4 +; CLEANUP-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 25 +; CLEANUP-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(32) [[TMP74]], align 4 +; CLEANUP-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 26 +; CLEANUP-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(32) [[TMP76]], align 4 +; CLEANUP-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 +; CLEANUP-NEXT: store i32 [[TMP80]], ptr addrspace(32) [[DOTSPILL_ADDR]], align 4 ; CLEANUP-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 ; CLEANUP-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[DOTSROA_053_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; CLEANUP-NEXT: [[TMP53:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[TMP81:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 ; CLEANUP-NEXT: [[DOTSROA_053_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; CLEANUP-NEXT: [[TMP54:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[TMP82:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: [[TMP55:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; CLEANUP-NEXT: [[TMP56:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; CLEANUP-NEXT: [[TMP57:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP55]]) -; CLEANUP-NEXT: [[TMP58:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP57]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; CLEANUP-NEXT: [[TMP59:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP58]]) +; CLEANUP-NEXT: [[TMP83:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; CLEANUP-NEXT: [[TMP84:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; CLEANUP-NEXT: [[TMP85:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP83]]) +; CLEANUP-NEXT: [[TMP86:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP85]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-NEXT: [[TMP87:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP86]]) ; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; CLEANUP-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; CLEANUP-NEXT: [[TMP60:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 -; CLEANUP-NEXT: store i32 [[TMP60]], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: store i32 [[TMP2]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; CLEANUP-NEXT: store i32 [[TMP3]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; CLEANUP-NEXT: store i32 [[TMP4]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; CLEANUP-NEXT: store i32 [[TMP5]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 -; CLEANUP-NEXT: store i32 [[TMP6]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 -; CLEANUP-NEXT: store i32 [[TMP7]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 -; CLEANUP-NEXT: store i32 [[TMP8]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 -; CLEANUP-NEXT: store i32 [[TMP9]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 -; CLEANUP-NEXT: store i32 [[TMP10]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 -; CLEANUP-NEXT: store i32 [[TMP11]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; CLEANUP-NEXT: store i32 [[TMP12]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; CLEANUP-NEXT: store i32 [[TMP13]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; CLEANUP-NEXT: store i32 [[TMP14]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; CLEANUP-NEXT: store i32 [[TMP15]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; CLEANUP-NEXT: store i32 [[TMP16]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; CLEANUP-NEXT: store i32 [[TMP17]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; CLEANUP-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; CLEANUP-NEXT: store i32 [[TMP19]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; CLEANUP-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; CLEANUP-NEXT: store i32 [[TMP21]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; CLEANUP-NEXT: store i32 [[TMP22]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; CLEANUP-NEXT: store i32 [[TMP23]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; CLEANUP-NEXT: store i32 [[TMP24]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; CLEANUP-NEXT: store i32 [[TMP25]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 -; CLEANUP-NEXT: store i32 [[TMP26]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 -; CLEANUP-NEXT: store i32 [[TMP27]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 -; CLEANUP-NEXT: store i32 [[TMP28]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 -; CLEANUP-NEXT: store i32 [[TMP29]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 -; CLEANUP-NEXT: store i32 [[TMP30]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 -; CLEANUP-NEXT: store i32 [[TMP31]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 -; CLEANUP-NEXT: store i32 [[TMP32]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 -; CLEANUP-NEXT: store i32 [[TMP33]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 -; CLEANUP-NEXT: store i32 [[TMP34]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 -; CLEANUP-NEXT: store i32 [[TMP35]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 -; CLEANUP-NEXT: store i32 [[TMP36]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 -; CLEANUP-NEXT: store i32 [[TMP37]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 -; CLEANUP-NEXT: store i32 [[TMP38]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 -; CLEANUP-NEXT: store i32 [[TMP39]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 -; CLEANUP-NEXT: store i32 [[TMP40]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 -; CLEANUP-NEXT: store i32 [[TMP41]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 -; CLEANUP-NEXT: store i32 [[TMP42]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 -; CLEANUP-NEXT: store i32 [[TMP43]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 -; CLEANUP-NEXT: store i32 [[TMP44]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 -; CLEANUP-NEXT: store i32 [[TMP45]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 -; CLEANUP-NEXT: store i32 [[TMP46]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 -; CLEANUP-NEXT: store i32 [[TMP47]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 -; CLEANUP-NEXT: store i32 [[TMP48]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 -; CLEANUP-NEXT: store i32 [[TMP49]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 -; CLEANUP-NEXT: store i32 [[TMP50]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 -; CLEANUP-NEXT: store i32 [[TMP51]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 -; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 120) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @ClosestHit.resume.0 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18]], !continuation.returnedRegistercount !18 +; CLEANUP-NEXT: [[TMP88:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 +; CLEANUP-NEXT: store i32 [[TMP88]], ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP89:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; CLEANUP-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; CLEANUP-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; CLEANUP-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; CLEANUP-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 +; CLEANUP-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 +; CLEANUP-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; CLEANUP-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 +; CLEANUP-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 +; CLEANUP-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 +; CLEANUP-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 +; CLEANUP-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 +; CLEANUP-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 +; CLEANUP-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 +; CLEANUP-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 +; CLEANUP-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; CLEANUP-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; CLEANUP-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; CLEANUP-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; CLEANUP-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; CLEANUP-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; CLEANUP-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; CLEANUP-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; CLEANUP-NEXT: store i32 [[TMP78]], ptr addrspace(32) [[TMP89]], align 4 +; CLEANUP-NEXT: [[TMP114:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 1 +; CLEANUP-NEXT: store i32 [[TMP27]], ptr addrspace(32) [[TMP114]], align 4 +; CLEANUP-NEXT: [[TMP115:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 2 +; CLEANUP-NEXT: store i32 [[TMP29]], ptr addrspace(32) [[TMP115]], align 4 +; CLEANUP-NEXT: [[TMP90:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 3 +; CLEANUP-NEXT: store i32 [[TMP31]], ptr addrspace(32) [[TMP90]], align 4 +; CLEANUP-NEXT: [[TMP91:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 4 +; CLEANUP-NEXT: store i32 [[TMP33]], ptr addrspace(32) [[TMP91]], align 4 +; CLEANUP-NEXT: [[TMP92:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 5 +; CLEANUP-NEXT: store i32 [[TMP35]], ptr addrspace(32) [[TMP92]], align 4 +; CLEANUP-NEXT: [[TMP93:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 6 +; CLEANUP-NEXT: store i32 [[TMP37]], ptr addrspace(32) [[TMP93]], align 4 +; CLEANUP-NEXT: [[TMP94:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 7 +; CLEANUP-NEXT: store i32 [[TMP39]], ptr addrspace(32) [[TMP94]], align 4 +; CLEANUP-NEXT: [[TMP95:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 8 +; CLEANUP-NEXT: store i32 [[TMP41]], ptr addrspace(32) [[TMP95]], align 4 +; CLEANUP-NEXT: [[TMP96:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 9 +; CLEANUP-NEXT: store i32 [[TMP43]], ptr addrspace(32) [[TMP96]], align 4 +; CLEANUP-NEXT: [[TMP97:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 10 +; CLEANUP-NEXT: store i32 [[TMP45]], ptr addrspace(32) [[TMP97]], align 4 +; CLEANUP-NEXT: [[TMP98:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 11 +; CLEANUP-NEXT: store i32 [[TMP47]], ptr addrspace(32) [[TMP98]], align 4 +; CLEANUP-NEXT: [[TMP99:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 12 +; CLEANUP-NEXT: store i32 [[TMP49]], ptr addrspace(32) [[TMP99]], align 4 +; CLEANUP-NEXT: [[TMP100:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 13 +; CLEANUP-NEXT: store i32 [[TMP51]], ptr addrspace(32) [[TMP100]], align 4 +; CLEANUP-NEXT: [[TMP101:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 14 +; CLEANUP-NEXT: store i32 [[TMP53]], ptr addrspace(32) [[TMP101]], align 4 +; CLEANUP-NEXT: [[TMP102:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 15 +; CLEANUP-NEXT: store i32 [[TMP55]], ptr addrspace(32) [[TMP102]], align 4 +; CLEANUP-NEXT: [[TMP103:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 16 +; CLEANUP-NEXT: store i32 [[TMP57]], ptr addrspace(32) [[TMP103]], align 4 +; CLEANUP-NEXT: [[TMP104:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 17 +; CLEANUP-NEXT: store i32 [[TMP59]], ptr addrspace(32) [[TMP104]], align 4 +; CLEANUP-NEXT: [[TMP105:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 18 +; CLEANUP-NEXT: store i32 [[TMP61]], ptr addrspace(32) [[TMP105]], align 4 +; CLEANUP-NEXT: [[TMP106:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 19 +; CLEANUP-NEXT: store i32 [[TMP63]], ptr addrspace(32) [[TMP106]], align 4 +; CLEANUP-NEXT: [[TMP107:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 20 +; CLEANUP-NEXT: store i32 [[TMP65]], ptr addrspace(32) [[TMP107]], align 4 +; CLEANUP-NEXT: [[TMP108:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 21 +; CLEANUP-NEXT: store i32 [[TMP67]], ptr addrspace(32) [[TMP108]], align 4 +; CLEANUP-NEXT: [[TMP109:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 22 +; CLEANUP-NEXT: store i32 [[TMP69]], ptr addrspace(32) [[TMP109]], align 4 +; CLEANUP-NEXT: [[TMP110:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 23 +; CLEANUP-NEXT: store i32 [[TMP71]], ptr addrspace(32) [[TMP110]], align 4 +; CLEANUP-NEXT: [[TMP111:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 24 +; CLEANUP-NEXT: store i32 [[TMP73]], ptr addrspace(32) [[TMP111]], align 4 +; CLEANUP-NEXT: [[TMP112:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 25 +; CLEANUP-NEXT: store i32 [[TMP75]], ptr addrspace(32) [[TMP112]], align 4 +; CLEANUP-NEXT: [[TMP113:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 26 +; CLEANUP-NEXT: store i32 [[TMP77]], ptr addrspace(32) [[TMP113]], align 4 +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 4, i64 ptrtoint (ptr @ClosestHit.resume.0 to i64), [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @ClosestHit.resume.0( -; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META25]] !continuation.registercount [[META18]] !continuation [[META26]] { +; CLEANUP-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META24]] !continuation.registercount [[META17]] !continuation [[META25]] { ; CLEANUP-NEXT: entryresume.0: -; CLEANUP-NEXT: call void @lgc.cps.free(i32 120) -; CLEANUP-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 0) -; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(32) [[TMP1]], i32 0, i32 0 -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 -; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 -; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 -; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 -; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 -; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 -; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 -; CLEANUP-NEXT: [[TMP28:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 -; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 -; CLEANUP-NEXT: [[TMP30:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 -; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 -; CLEANUP-NEXT: [[TMP32:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 -; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 -; CLEANUP-NEXT: [[TMP34:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 -; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 -; CLEANUP-NEXT: [[TMP36:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 -; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 -; CLEANUP-NEXT: [[TMP38:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 -; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 -; CLEANUP-NEXT: [[TMP40:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 -; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 -; CLEANUP-NEXT: [[TMP42:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 -; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 -; CLEANUP-NEXT: [[TMP44:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 -; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 -; CLEANUP-NEXT: [[TMP46:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 -; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 -; CLEANUP-NEXT: [[TMP48:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 -; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 -; CLEANUP-NEXT: [[TMP50:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 -; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 +; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 120) +; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CLEANUP-NEXT: [[TMP1:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 +; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 +; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 +; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 +; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 +; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 +; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 +; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 +; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 +; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 +; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; CLEANUP-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(32) [[TMP1]], align 4 +; CLEANUP-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 1 +; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(32) [[TMP26]], align 4 +; CLEANUP-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 2 +; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(32) [[TMP28]], align 4 +; CLEANUP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 3 +; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(32) [[TMP30]], align 4 +; CLEANUP-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 4 +; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(32) [[TMP32]], align 4 +; CLEANUP-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 5 +; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(32) [[TMP34]], align 4 +; CLEANUP-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 6 +; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(32) [[TMP36]], align 4 +; CLEANUP-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 7 +; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(32) [[TMP38]], align 4 +; CLEANUP-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 8 +; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(32) [[TMP40]], align 4 +; CLEANUP-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 9 +; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(32) [[TMP42]], align 4 +; CLEANUP-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 10 +; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(32) [[TMP44]], align 4 +; CLEANUP-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 11 +; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(32) [[TMP46]], align 4 +; CLEANUP-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 12 +; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(32) [[TMP48]], align 4 +; CLEANUP-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 13 +; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(32) [[TMP50]], align 4 +; CLEANUP-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 14 +; CLEANUP-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(32) [[TMP52]], align 4 +; CLEANUP-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 15 +; CLEANUP-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(32) [[TMP54]], align 4 +; CLEANUP-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 16 +; CLEANUP-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(32) [[TMP56]], align 4 +; CLEANUP-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 17 +; CLEANUP-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(32) [[TMP58]], align 4 +; CLEANUP-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 18 +; CLEANUP-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(32) [[TMP60]], align 4 +; CLEANUP-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 19 +; CLEANUP-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(32) [[TMP62]], align 4 +; CLEANUP-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 20 +; CLEANUP-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(32) [[TMP64]], align 4 +; CLEANUP-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 21 +; CLEANUP-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(32) [[TMP66]], align 4 +; CLEANUP-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 22 +; CLEANUP-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(32) [[TMP68]], align 4 +; CLEANUP-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 23 +; CLEANUP-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(32) [[TMP70]], align 4 +; CLEANUP-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 24 +; CLEANUP-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(32) [[TMP72]], align 4 +; CLEANUP-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 25 +; CLEANUP-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(32) [[TMP74]], align 4 +; CLEANUP-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 26 +; CLEANUP-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(32) [[TMP76]], align 4 +; CLEANUP-NEXT: [[TMP80:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 2 +; CLEANUP-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 ; CLEANUP-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(32) [[DOTRELOAD_ADDR]], align 4 -; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[TMP1]], i32 0, i32 1 +; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 -; CLEANUP-NEXT: store i32 [[DOTRELOAD]], ptr @PAYLOAD, align 4 -; CLEANUP-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; CLEANUP-NEXT: store i32 [[TMP2]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; CLEANUP-NEXT: store i32 [[TMP3]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; CLEANUP-NEXT: store i32 [[TMP4]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; CLEANUP-NEXT: store i32 [[TMP5]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 10), align 4 -; CLEANUP-NEXT: store i32 [[TMP6]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 11), align 4 -; CLEANUP-NEXT: store i32 [[TMP7]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 12), align 4 -; CLEANUP-NEXT: store i32 [[TMP8]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 13), align 4 -; CLEANUP-NEXT: store i32 [[TMP9]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 14), align 4 -; CLEANUP-NEXT: store i32 [[TMP10]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 15), align 4 -; CLEANUP-NEXT: store i32 [[TMP11]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 16), align 4 -; CLEANUP-NEXT: store i32 [[TMP12]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 17), align 4 -; CLEANUP-NEXT: store i32 [[TMP13]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 18), align 4 -; CLEANUP-NEXT: store i32 [[TMP14]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 19), align 4 -; CLEANUP-NEXT: store i32 [[TMP15]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 20), align 4 -; CLEANUP-NEXT: store i32 [[TMP16]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 21), align 4 -; CLEANUP-NEXT: store i32 [[TMP17]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 22), align 4 -; CLEANUP-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 23), align 4 -; CLEANUP-NEXT: store i32 [[TMP19]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 24), align 4 -; CLEANUP-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 25), align 4 -; CLEANUP-NEXT: store i32 [[TMP21]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 26), align 4 -; CLEANUP-NEXT: store i32 [[TMP22]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 27), align 4 -; CLEANUP-NEXT: store i32 [[TMP23]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 28), align 4 -; CLEANUP-NEXT: store i32 [[TMP24]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 29), align 4 -; CLEANUP-NEXT: store i32 [[TMP25]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 30), align 4 -; CLEANUP-NEXT: store i32 [[TMP26]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 31), align 4 -; CLEANUP-NEXT: store i32 [[TMP27]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 32), align 4 -; CLEANUP-NEXT: store i32 [[TMP28]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 33), align 4 -; CLEANUP-NEXT: store i32 [[TMP29]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 34), align 4 -; CLEANUP-NEXT: store i32 [[TMP30]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 35), align 4 -; CLEANUP-NEXT: store i32 [[TMP31]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 36), align 4 -; CLEANUP-NEXT: store i32 [[TMP32]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 37), align 4 -; CLEANUP-NEXT: store i32 [[TMP33]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 38), align 4 -; CLEANUP-NEXT: store i32 [[TMP34]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 39), align 4 -; CLEANUP-NEXT: store i32 [[TMP35]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 40), align 4 -; CLEANUP-NEXT: store i32 [[TMP36]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 41), align 4 -; CLEANUP-NEXT: store i32 [[TMP37]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 42), align 4 -; CLEANUP-NEXT: store i32 [[TMP38]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 43), align 4 -; CLEANUP-NEXT: store i32 [[TMP39]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 44), align 4 -; CLEANUP-NEXT: store i32 [[TMP40]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 45), align 4 -; CLEANUP-NEXT: store i32 [[TMP41]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 46), align 4 -; CLEANUP-NEXT: store i32 [[TMP42]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 47), align 4 -; CLEANUP-NEXT: store i32 [[TMP43]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 48), align 4 -; CLEANUP-NEXT: store i32 [[TMP44]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 49), align 4 -; CLEANUP-NEXT: store i32 [[TMP45]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 50), align 4 -; CLEANUP-NEXT: store i32 [[TMP46]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 51), align 4 -; CLEANUP-NEXT: store i32 [[TMP47]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 52), align 4 -; CLEANUP-NEXT: store i32 [[TMP48]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 53), align 4 -; CLEANUP-NEXT: store i32 [[TMP49]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 54), align 4 -; CLEANUP-NEXT: store i32 [[TMP50]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 55), align 4 -; CLEANUP-NEXT: store i32 [[TMP51]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 56), align 4 +; CLEANUP-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[TMP81:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; CLEANUP-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; CLEANUP-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; CLEANUP-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; CLEANUP-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 +; CLEANUP-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 +; CLEANUP-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; CLEANUP-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 +; CLEANUP-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 +; CLEANUP-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 +; CLEANUP-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 +; CLEANUP-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 +; CLEANUP-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 +; CLEANUP-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 +; CLEANUP-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 +; CLEANUP-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 +; CLEANUP-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 +; CLEANUP-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 +; CLEANUP-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 +; CLEANUP-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 +; CLEANUP-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 +; CLEANUP-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 +; CLEANUP-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; CLEANUP-NEXT: store i32 [[TMP78]], ptr addrspace(32) [[TMP81]], align 4 +; CLEANUP-NEXT: [[TMP106:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 1 +; CLEANUP-NEXT: store i32 [[TMP27]], ptr addrspace(32) [[TMP106]], align 4 +; CLEANUP-NEXT: [[TMP107:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 2 +; CLEANUP-NEXT: store i32 [[TMP29]], ptr addrspace(32) [[TMP107]], align 4 +; CLEANUP-NEXT: [[TMP82:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 3 +; CLEANUP-NEXT: store i32 [[TMP31]], ptr addrspace(32) [[TMP82]], align 4 +; CLEANUP-NEXT: [[TMP83:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 4 +; CLEANUP-NEXT: store i32 [[TMP33]], ptr addrspace(32) [[TMP83]], align 4 +; CLEANUP-NEXT: [[TMP84:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 5 +; CLEANUP-NEXT: store i32 [[TMP35]], ptr addrspace(32) [[TMP84]], align 4 +; CLEANUP-NEXT: [[TMP85:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 6 +; CLEANUP-NEXT: store i32 [[TMP37]], ptr addrspace(32) [[TMP85]], align 4 +; CLEANUP-NEXT: [[TMP86:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 7 +; CLEANUP-NEXT: store i32 [[TMP39]], ptr addrspace(32) [[TMP86]], align 4 +; CLEANUP-NEXT: [[TMP87:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 8 +; CLEANUP-NEXT: store i32 [[TMP41]], ptr addrspace(32) [[TMP87]], align 4 +; CLEANUP-NEXT: [[TMP88:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 9 +; CLEANUP-NEXT: store i32 [[TMP43]], ptr addrspace(32) [[TMP88]], align 4 +; CLEANUP-NEXT: [[TMP89:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 10 +; CLEANUP-NEXT: store i32 [[TMP45]], ptr addrspace(32) [[TMP89]], align 4 +; CLEANUP-NEXT: [[TMP90:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 11 +; CLEANUP-NEXT: store i32 [[TMP47]], ptr addrspace(32) [[TMP90]], align 4 +; CLEANUP-NEXT: [[TMP91:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 12 +; CLEANUP-NEXT: store i32 [[TMP49]], ptr addrspace(32) [[TMP91]], align 4 +; CLEANUP-NEXT: [[TMP92:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 13 +; CLEANUP-NEXT: store i32 [[TMP51]], ptr addrspace(32) [[TMP92]], align 4 +; CLEANUP-NEXT: [[TMP93:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 14 +; CLEANUP-NEXT: store i32 [[TMP53]], ptr addrspace(32) [[TMP93]], align 4 +; CLEANUP-NEXT: [[TMP94:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 15 +; CLEANUP-NEXT: store i32 [[TMP55]], ptr addrspace(32) [[TMP94]], align 4 +; CLEANUP-NEXT: [[TMP95:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 16 +; CLEANUP-NEXT: store i32 [[TMP57]], ptr addrspace(32) [[TMP95]], align 4 +; CLEANUP-NEXT: [[TMP96:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 17 +; CLEANUP-NEXT: store i32 [[TMP59]], ptr addrspace(32) [[TMP96]], align 4 +; CLEANUP-NEXT: [[TMP97:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 18 +; CLEANUP-NEXT: store i32 [[TMP61]], ptr addrspace(32) [[TMP97]], align 4 +; CLEANUP-NEXT: [[TMP98:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 19 +; CLEANUP-NEXT: store i32 [[TMP63]], ptr addrspace(32) [[TMP98]], align 4 +; CLEANUP-NEXT: [[TMP99:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 20 +; CLEANUP-NEXT: store i32 [[TMP65]], ptr addrspace(32) [[TMP99]], align 4 +; CLEANUP-NEXT: [[TMP100:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 21 +; CLEANUP-NEXT: store i32 [[TMP67]], ptr addrspace(32) [[TMP100]], align 4 +; CLEANUP-NEXT: [[TMP101:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 22 +; CLEANUP-NEXT: store i32 [[TMP69]], ptr addrspace(32) [[TMP101]], align 4 +; CLEANUP-NEXT: [[TMP102:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 23 +; CLEANUP-NEXT: store i32 [[TMP71]], ptr addrspace(32) [[TMP102]], align 4 +; CLEANUP-NEXT: [[TMP103:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 24 +; CLEANUP-NEXT: store i32 [[TMP73]], ptr addrspace(32) [[TMP103]], align 4 +; CLEANUP-NEXT: [[TMP104:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 25 +; CLEANUP-NEXT: store i32 [[TMP75]], ptr addrspace(32) [[TMP104]], align 4 +; CLEANUP-NEXT: [[TMP105:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 26 +; CLEANUP-NEXT: store i32 [[TMP77]], ptr addrspace(32) [[TMP105]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META18]] +; CLEANUP-NEXT: call void @lgc.cps.free(i32 120) +; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] ; CLEANUP-NEXT: unreachable ; ; @@ -799,211 +1009,155 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() ; POST-PROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() +; POST-PROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[_CONT_SETUPRAYGEN:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() ; POST-PROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 +; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 108 +; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; POST-PROCESS-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; POST-PROCESS-NEXT: [[TMP4:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; POST-PROCESS-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) -; POST-PROCESS-NEXT: [[TMP6:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP5]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POST-PROCESS-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; POST-PROCESS-NEXT: [[TMP6:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP5]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; POST-PROCESS-NEXT: [[TMP7:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP6]]) ; POST-PROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 ; POST-PROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; POST-PROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP1]], ptr addrspace(20) @REGISTERS, align 4 ; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], -120 -; POST-PROCESS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 120 -; POST-PROCESS-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP10]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP11]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP12]], align 4 -; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], -120 -; POST-PROCESS-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 124 -; POST-PROCESS-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP15]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP16]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP17]], align 4 -; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], -120 -; POST-PROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 128 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP8]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP10]], align 4 +; POST-PROCESS-NEXT: [[TMP11:%.*]] = add i32 [[TMP8]], 4 +; POST-PROCESS-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP11]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP12]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP13]], align 4 +; POST-PROCESS-NEXT: [[TMP14:%.*]] = add i32 [[TMP8]], 8 +; POST-PROCESS-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP15]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP16]], align 4 +; POST-PROCESS-NEXT: [[TMP17:%.*]] = add i32 [[TMP8]], 12 +; POST-PROCESS-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP17]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP18]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP19]], align 4 +; POST-PROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP8]], 16 ; POST-PROCESS-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP20]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP21]], i32 0 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP22]], align 4 -; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], -120 -; POST-PROCESS-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], 132 -; POST-PROCESS-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP25]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP26]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP27]], align 4 -; POST-PROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], -120 -; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], 136 -; POST-PROCESS-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP31]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP32]], align 4 -; POST-PROCESS-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], -120 -; POST-PROCESS-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], 140 +; POST-PROCESS-NEXT: [[TMP23:%.*]] = add i32 [[TMP8]], 20 +; POST-PROCESS-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP23]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP24]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP25]], align 4 +; POST-PROCESS-NEXT: [[TMP26:%.*]] = add i32 [[TMP8]], 24 +; POST-PROCESS-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP26]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP27]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP28]], align 4 +; POST-PROCESS-NEXT: [[TMP29:%.*]] = add i32 [[TMP8]], 28 +; POST-PROCESS-NEXT: [[TMP30:%.*]] = inttoptr i32 [[TMP29]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP30]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP31]], align 4 +; POST-PROCESS-NEXT: [[TMP32:%.*]] = add i32 [[TMP8]], 32 +; POST-PROCESS-NEXT: [[TMP33:%.*]] = inttoptr i32 [[TMP32]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP33]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP34]], align 4 +; POST-PROCESS-NEXT: [[TMP35:%.*]] = add i32 [[TMP8]], 36 ; POST-PROCESS-NEXT: [[TMP36:%.*]] = inttoptr i32 [[TMP35]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP36]], i32 0 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP37]], align 4 -; POST-PROCESS-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP39:%.*]] = add i32 [[TMP38]], -120 -; POST-PROCESS-NEXT: [[TMP40:%.*]] = add i32 [[TMP39]], 144 -; POST-PROCESS-NEXT: [[TMP41:%.*]] = inttoptr i32 [[TMP40]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP41]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP42]], align 4 -; POST-PROCESS-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP44:%.*]] = add i32 [[TMP43]], -120 -; POST-PROCESS-NEXT: [[TMP45:%.*]] = add i32 [[TMP44]], 148 -; POST-PROCESS-NEXT: [[TMP46:%.*]] = inttoptr i32 [[TMP45]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP46]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP47]], align 4 -; POST-PROCESS-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP49:%.*]] = add i32 [[TMP48]], -120 -; POST-PROCESS-NEXT: [[TMP50:%.*]] = add i32 [[TMP49]], 152 +; POST-PROCESS-NEXT: [[TMP38:%.*]] = add i32 [[TMP8]], 40 +; POST-PROCESS-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP38]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP39]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP40]], align 4 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = add i32 [[TMP8]], 44 +; POST-PROCESS-NEXT: [[TMP42:%.*]] = inttoptr i32 [[TMP41]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP42]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP43]], align 4 +; POST-PROCESS-NEXT: [[TMP44:%.*]] = add i32 [[TMP8]], 48 +; POST-PROCESS-NEXT: [[TMP45:%.*]] = inttoptr i32 [[TMP44]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP45]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP46]], align 4 +; POST-PROCESS-NEXT: [[TMP47:%.*]] = add i32 [[TMP8]], 52 +; POST-PROCESS-NEXT: [[TMP48:%.*]] = inttoptr i32 [[TMP47]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP48]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP49]], align 4 +; POST-PROCESS-NEXT: [[TMP50:%.*]] = add i32 [[TMP8]], 56 ; POST-PROCESS-NEXT: [[TMP51:%.*]] = inttoptr i32 [[TMP50]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP51]], i32 0 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP52]], align 4 -; POST-PROCESS-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP53]], -120 -; POST-PROCESS-NEXT: [[TMP55:%.*]] = add i32 [[TMP54]], 156 -; POST-PROCESS-NEXT: [[TMP56:%.*]] = inttoptr i32 [[TMP55]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP56]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP57]], align 4 -; POST-PROCESS-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP59:%.*]] = add i32 [[TMP58]], -120 -; POST-PROCESS-NEXT: [[TMP60:%.*]] = add i32 [[TMP59]], 160 -; POST-PROCESS-NEXT: [[TMP61:%.*]] = inttoptr i32 [[TMP60]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP61]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP62]], align 4 -; POST-PROCESS-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP64:%.*]] = add i32 [[TMP63]], -120 -; POST-PROCESS-NEXT: [[TMP65:%.*]] = add i32 [[TMP64]], 164 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = add i32 [[TMP8]], 60 +; POST-PROCESS-NEXT: [[TMP54:%.*]] = inttoptr i32 [[TMP53]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP54]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP55]], align 4 +; POST-PROCESS-NEXT: [[TMP56:%.*]] = add i32 [[TMP8]], 64 +; POST-PROCESS-NEXT: [[TMP57:%.*]] = inttoptr i32 [[TMP56]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP57]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP58]], align 4 +; POST-PROCESS-NEXT: [[TMP59:%.*]] = add i32 [[TMP8]], 68 +; POST-PROCESS-NEXT: [[TMP60:%.*]] = inttoptr i32 [[TMP59]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP60]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP61]], align 4 +; POST-PROCESS-NEXT: [[TMP62:%.*]] = add i32 [[TMP8]], 72 +; POST-PROCESS-NEXT: [[TMP63:%.*]] = inttoptr i32 [[TMP62]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP63]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP64]], align 4 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = add i32 [[TMP8]], 76 ; POST-PROCESS-NEXT: [[TMP66:%.*]] = inttoptr i32 [[TMP65]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP67:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP66]], i32 0 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP67]], align 4 -; POST-PROCESS-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP69:%.*]] = add i32 [[TMP68]], -120 -; POST-PROCESS-NEXT: [[TMP70:%.*]] = add i32 [[TMP69]], 168 -; POST-PROCESS-NEXT: [[TMP71:%.*]] = inttoptr i32 [[TMP70]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP71]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP72]], align 4 -; POST-PROCESS-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP74:%.*]] = add i32 [[TMP73]], -120 -; POST-PROCESS-NEXT: [[TMP75:%.*]] = add i32 [[TMP74]], 172 -; POST-PROCESS-NEXT: [[TMP76:%.*]] = inttoptr i32 [[TMP75]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP76]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP77]], align 4 -; POST-PROCESS-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP79:%.*]] = add i32 [[TMP78]], -120 -; POST-PROCESS-NEXT: [[TMP80:%.*]] = add i32 [[TMP79]], 176 +; POST-PROCESS-NEXT: [[TMP68:%.*]] = add i32 [[TMP8]], 80 +; POST-PROCESS-NEXT: [[TMP69:%.*]] = inttoptr i32 [[TMP68]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP69]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP70]], align 4 +; POST-PROCESS-NEXT: [[TMP71:%.*]] = add i32 [[TMP8]], 84 +; POST-PROCESS-NEXT: [[TMP72:%.*]] = inttoptr i32 [[TMP71]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP72]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP73]], align 4 +; POST-PROCESS-NEXT: [[TMP74:%.*]] = add i32 [[TMP8]], 88 +; POST-PROCESS-NEXT: [[TMP75:%.*]] = inttoptr i32 [[TMP74]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP75]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP76]], align 4 +; POST-PROCESS-NEXT: [[TMP77:%.*]] = add i32 [[TMP8]], 92 +; POST-PROCESS-NEXT: [[TMP78:%.*]] = inttoptr i32 [[TMP77]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP78]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP79]], align 4 +; POST-PROCESS-NEXT: [[TMP80:%.*]] = add i32 [[TMP8]], 96 ; POST-PROCESS-NEXT: [[TMP81:%.*]] = inttoptr i32 [[TMP80]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP81]], i32 0 ; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP82]], align 4 -; POST-PROCESS-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP84:%.*]] = add i32 [[TMP83]], -120 -; POST-PROCESS-NEXT: [[TMP85:%.*]] = add i32 [[TMP84]], 180 -; POST-PROCESS-NEXT: [[TMP86:%.*]] = inttoptr i32 [[TMP85]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP86]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP87]], align 4 -; POST-PROCESS-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP89:%.*]] = add i32 [[TMP88]], -120 -; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP89]], 184 -; POST-PROCESS-NEXT: [[TMP91:%.*]] = inttoptr i32 [[TMP90]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP91]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP92]], align 4 -; POST-PROCESS-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP94:%.*]] = add i32 [[TMP93]], -120 -; POST-PROCESS-NEXT: [[TMP95:%.*]] = add i32 [[TMP94]], 188 -; POST-PROCESS-NEXT: [[TMP96:%.*]] = inttoptr i32 [[TMP95]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP96]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP97]], align 4 -; POST-PROCESS-NEXT: [[TMP98:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP99:%.*]] = add i32 [[TMP98]], -120 -; POST-PROCESS-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], 192 -; POST-PROCESS-NEXT: [[TMP101:%.*]] = inttoptr i32 [[TMP100]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP102:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP101]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP102]], align 4 -; POST-PROCESS-NEXT: [[TMP103:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP104:%.*]] = add i32 [[TMP103]], -120 -; POST-PROCESS-NEXT: [[TMP105:%.*]] = add i32 [[TMP104]], 196 -; POST-PROCESS-NEXT: [[TMP106:%.*]] = inttoptr i32 [[TMP105]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP107:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP106]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP107]], align 4 -; POST-PROCESS-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP109:%.*]] = add i32 [[TMP108]], -120 -; POST-PROCESS-NEXT: [[TMP110:%.*]] = add i32 [[TMP109]], 200 -; POST-PROCESS-NEXT: [[TMP111:%.*]] = inttoptr i32 [[TMP110]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP112:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP111]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP112]], align 4 -; POST-PROCESS-NEXT: [[TMP113:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP113]], -120 -; POST-PROCESS-NEXT: [[TMP115:%.*]] = add i32 [[TMP114]], 204 -; POST-PROCESS-NEXT: [[TMP116:%.*]] = inttoptr i32 [[TMP115]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP116]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP117]], align 4 -; POST-PROCESS-NEXT: [[TMP118:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP119:%.*]] = add i32 [[TMP118]], -120 -; POST-PROCESS-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], 208 -; POST-PROCESS-NEXT: [[TMP121:%.*]] = inttoptr i32 [[TMP120]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP122:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP121]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP122]], align 4 -; POST-PROCESS-NEXT: [[TMP123:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP124:%.*]] = add i32 [[TMP123]], -120 -; POST-PROCESS-NEXT: [[TMP125:%.*]] = add i32 [[TMP124]], 212 -; POST-PROCESS-NEXT: [[TMP126:%.*]] = inttoptr i32 [[TMP125]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP127:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP126]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP127]], align 4 -; POST-PROCESS-NEXT: [[TMP128:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP129:%.*]] = add i32 [[TMP128]], -120 -; POST-PROCESS-NEXT: [[TMP130:%.*]] = add i32 [[TMP129]], 216 -; POST-PROCESS-NEXT: [[TMP131:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP132:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP131]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP132]], align 4 -; POST-PROCESS-NEXT: [[TMP133:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP134:%.*]] = add i32 [[TMP133]], -120 -; POST-PROCESS-NEXT: [[TMP135:%.*]] = add i32 [[TMP134]], 220 -; POST-PROCESS-NEXT: [[TMP136:%.*]] = inttoptr i32 [[TMP135]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP137:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP136]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP137]], align 4 -; POST-PROCESS-NEXT: [[TMP138:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP139:%.*]] = add i32 [[TMP138]], -120 -; POST-PROCESS-NEXT: [[TMP140:%.*]] = add i32 [[TMP139]], 224 -; POST-PROCESS-NEXT: [[TMP141:%.*]] = inttoptr i32 [[TMP140]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP142:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP141]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP142]], align 4 -; POST-PROCESS-NEXT: [[TMP143:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP144:%.*]] = add i32 [[TMP143]], 108 -; POST-PROCESS-NEXT: store i32 [[TMP144]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP145:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP146:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @main.resume.0 to i64)) -; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP145]], i64 [[TMP146]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META17:![0-9]+]], !continuation.returnedRegistercount !17 +; POST-PROCESS-NEXT: [[TMP83:%.*]] = add i32 [[TMP8]], 100 +; POST-PROCESS-NEXT: [[TMP84:%.*]] = inttoptr i32 [[TMP83]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP84]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP85]], align 4 +; POST-PROCESS-NEXT: [[TMP86:%.*]] = add i32 [[TMP8]], 104 +; POST-PROCESS-NEXT: [[TMP87:%.*]] = inttoptr i32 [[TMP86]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP87]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP88]], align 4 +; POST-PROCESS-NEXT: [[TMP89:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP90:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @main.resume.0 to i64)) +; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP89]], i64 [[TMP90]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META17:![0-9]+]], !continuation.returnedRegistercount !17 ; POST-PROCESS-NEXT: unreachable ; ; @@ -1014,196 +1168,143 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -108 -; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 -; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], -120 -; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], 120 +; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP27]], i32 0 +; POST-PROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(21) [[TMP28]], align 4 +; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP3]], 4 ; POST-PROCESS-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP31]], i32 0 ; POST-PROCESS-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(21) [[TMP32]], align 4 -; POST-PROCESS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], -120 -; POST-PROCESS-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], 124 -; POST-PROCESS-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP36]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP37]], i32 0 -; POST-PROCESS-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(21) [[TMP38]], align 4 -; POST-PROCESS-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], -120 -; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], 128 +; POST-PROCESS-NEXT: [[TMP34:%.*]] = add i32 [[TMP3]], 8 +; POST-PROCESS-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP34]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP35]], i32 0 +; POST-PROCESS-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(21) [[TMP36]], align 4 +; POST-PROCESS-NEXT: [[TMP38:%.*]] = add i32 [[TMP3]], 12 +; POST-PROCESS-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP38]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP39]], i32 0 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(21) [[TMP40]], align 4 +; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[TMP3]], 16 ; POST-PROCESS-NEXT: [[TMP43:%.*]] = inttoptr i32 [[TMP42]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP43]], i32 0 ; POST-PROCESS-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(21) [[TMP44]], align 4 -; POST-PROCESS-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], -120 -; POST-PROCESS-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 132 -; POST-PROCESS-NEXT: [[TMP49:%.*]] = inttoptr i32 [[TMP48]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP49]], i32 0 -; POST-PROCESS-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(21) [[TMP50]], align 4 -; POST-PROCESS-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP53:%.*]] = add i32 [[TMP52]], -120 -; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP53]], 136 +; POST-PROCESS-NEXT: [[TMP46:%.*]] = add i32 [[TMP3]], 20 +; POST-PROCESS-NEXT: [[TMP47:%.*]] = inttoptr i32 [[TMP46]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP47]], i32 0 +; POST-PROCESS-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(21) [[TMP48]], align 4 +; POST-PROCESS-NEXT: [[TMP50:%.*]] = add i32 [[TMP3]], 24 +; POST-PROCESS-NEXT: [[TMP51:%.*]] = inttoptr i32 [[TMP50]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP51]], i32 0 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(21) [[TMP52]], align 4 +; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP3]], 28 ; POST-PROCESS-NEXT: [[TMP55:%.*]] = inttoptr i32 [[TMP54]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP55]], i32 0 ; POST-PROCESS-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(21) [[TMP56]], align 4 -; POST-PROCESS-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP59:%.*]] = add i32 [[TMP58]], -120 -; POST-PROCESS-NEXT: [[TMP60:%.*]] = add i32 [[TMP59]], 140 -; POST-PROCESS-NEXT: [[TMP61:%.*]] = inttoptr i32 [[TMP60]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP61]], i32 0 -; POST-PROCESS-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(21) [[TMP62]], align 4 -; POST-PROCESS-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP65:%.*]] = add i32 [[TMP64]], -120 -; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[TMP65]], 144 +; POST-PROCESS-NEXT: [[TMP58:%.*]] = add i32 [[TMP3]], 32 +; POST-PROCESS-NEXT: [[TMP59:%.*]] = inttoptr i32 [[TMP58]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP59]], i32 0 +; POST-PROCESS-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(21) [[TMP60]], align 4 +; POST-PROCESS-NEXT: [[TMP62:%.*]] = add i32 [[TMP3]], 36 +; POST-PROCESS-NEXT: [[TMP63:%.*]] = inttoptr i32 [[TMP62]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP63]], i32 0 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(21) [[TMP64]], align 4 +; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[TMP3]], 40 ; POST-PROCESS-NEXT: [[TMP67:%.*]] = inttoptr i32 [[TMP66]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP67]], i32 0 ; POST-PROCESS-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(21) [[TMP68]], align 4 -; POST-PROCESS-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP71:%.*]] = add i32 [[TMP70]], -120 -; POST-PROCESS-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 148 -; POST-PROCESS-NEXT: [[TMP73:%.*]] = inttoptr i32 [[TMP72]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP73]], i32 0 -; POST-PROCESS-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(21) [[TMP74]], align 4 -; POST-PROCESS-NEXT: [[TMP76:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP77:%.*]] = add i32 [[TMP76]], -120 -; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[TMP77]], 152 +; POST-PROCESS-NEXT: [[TMP70:%.*]] = add i32 [[TMP3]], 44 +; POST-PROCESS-NEXT: [[TMP71:%.*]] = inttoptr i32 [[TMP70]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP71]], i32 0 +; POST-PROCESS-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(21) [[TMP72]], align 4 +; POST-PROCESS-NEXT: [[TMP74:%.*]] = add i32 [[TMP3]], 48 +; POST-PROCESS-NEXT: [[TMP75:%.*]] = inttoptr i32 [[TMP74]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP75]], i32 0 +; POST-PROCESS-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(21) [[TMP76]], align 4 +; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[TMP3]], 52 ; POST-PROCESS-NEXT: [[TMP79:%.*]] = inttoptr i32 [[TMP78]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP79]], i32 0 ; POST-PROCESS-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(21) [[TMP80]], align 4 -; POST-PROCESS-NEXT: [[TMP82:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP83:%.*]] = add i32 [[TMP82]], -120 -; POST-PROCESS-NEXT: [[TMP84:%.*]] = add i32 [[TMP83]], 156 -; POST-PROCESS-NEXT: [[TMP85:%.*]] = inttoptr i32 [[TMP84]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP85]], i32 0 -; POST-PROCESS-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(21) [[TMP86]], align 4 -; POST-PROCESS-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP89:%.*]] = add i32 [[TMP88]], -120 -; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP89]], 160 +; POST-PROCESS-NEXT: [[TMP82:%.*]] = add i32 [[TMP3]], 56 +; POST-PROCESS-NEXT: [[TMP83:%.*]] = inttoptr i32 [[TMP82]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP83]], i32 0 +; POST-PROCESS-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(21) [[TMP84]], align 4 +; POST-PROCESS-NEXT: [[TMP86:%.*]] = add i32 [[TMP3]], 60 +; POST-PROCESS-NEXT: [[TMP87:%.*]] = inttoptr i32 [[TMP86]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP87]], i32 0 +; POST-PROCESS-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(21) [[TMP88]], align 4 +; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP3]], 64 ; POST-PROCESS-NEXT: [[TMP91:%.*]] = inttoptr i32 [[TMP90]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP91]], i32 0 ; POST-PROCESS-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(21) [[TMP92]], align 4 -; POST-PROCESS-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP95:%.*]] = add i32 [[TMP94]], -120 -; POST-PROCESS-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], 164 -; POST-PROCESS-NEXT: [[TMP97:%.*]] = inttoptr i32 [[TMP96]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP97]], i32 0 -; POST-PROCESS-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(21) [[TMP98]], align 4 -; POST-PROCESS-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP101:%.*]] = add i32 [[TMP100]], -120 -; POST-PROCESS-NEXT: [[TMP102:%.*]] = add i32 [[TMP101]], 168 +; POST-PROCESS-NEXT: [[TMP94:%.*]] = add i32 [[TMP3]], 68 +; POST-PROCESS-NEXT: [[TMP95:%.*]] = inttoptr i32 [[TMP94]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP95]], i32 0 +; POST-PROCESS-NEXT: [[TMP97:%.*]] = load i32, ptr addrspace(21) [[TMP96]], align 4 +; POST-PROCESS-NEXT: [[TMP98:%.*]] = add i32 [[TMP3]], 72 +; POST-PROCESS-NEXT: [[TMP99:%.*]] = inttoptr i32 [[TMP98]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP99]], i32 0 +; POST-PROCESS-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(21) [[TMP100]], align 4 +; POST-PROCESS-NEXT: [[TMP102:%.*]] = add i32 [[TMP3]], 76 ; POST-PROCESS-NEXT: [[TMP103:%.*]] = inttoptr i32 [[TMP102]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP103]], i32 0 ; POST-PROCESS-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(21) [[TMP104]], align 4 -; POST-PROCESS-NEXT: [[TMP106:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP107:%.*]] = add i32 [[TMP106]], -120 -; POST-PROCESS-NEXT: [[TMP108:%.*]] = add i32 [[TMP107]], 172 -; POST-PROCESS-NEXT: [[TMP109:%.*]] = inttoptr i32 [[TMP108]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP110:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP109]], i32 0 -; POST-PROCESS-NEXT: [[TMP111:%.*]] = load i32, ptr addrspace(21) [[TMP110]], align 4 -; POST-PROCESS-NEXT: [[TMP112:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP113:%.*]] = add i32 [[TMP112]], -120 -; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP113]], 176 +; POST-PROCESS-NEXT: [[TMP106:%.*]] = add i32 [[TMP3]], 80 +; POST-PROCESS-NEXT: [[TMP107:%.*]] = inttoptr i32 [[TMP106]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP107]], i32 0 +; POST-PROCESS-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(21) [[TMP108]], align 4 +; POST-PROCESS-NEXT: [[TMP110:%.*]] = add i32 [[TMP3]], 84 +; POST-PROCESS-NEXT: [[TMP111:%.*]] = inttoptr i32 [[TMP110]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP112:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP111]], i32 0 +; POST-PROCESS-NEXT: [[TMP113:%.*]] = load i32, ptr addrspace(21) [[TMP112]], align 4 +; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP3]], 88 ; POST-PROCESS-NEXT: [[TMP115:%.*]] = inttoptr i32 [[TMP114]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP116:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP115]], i32 0 ; POST-PROCESS-NEXT: [[TMP117:%.*]] = load i32, ptr addrspace(21) [[TMP116]], align 4 -; POST-PROCESS-NEXT: [[TMP118:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP119:%.*]] = add i32 [[TMP118]], -120 -; POST-PROCESS-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], 180 -; POST-PROCESS-NEXT: [[TMP121:%.*]] = inttoptr i32 [[TMP120]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP122:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP121]], i32 0 -; POST-PROCESS-NEXT: [[TMP123:%.*]] = load i32, ptr addrspace(21) [[TMP122]], align 4 -; POST-PROCESS-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP125:%.*]] = add i32 [[TMP124]], -120 -; POST-PROCESS-NEXT: [[TMP126:%.*]] = add i32 [[TMP125]], 184 +; POST-PROCESS-NEXT: [[TMP118:%.*]] = add i32 [[TMP3]], 92 +; POST-PROCESS-NEXT: [[TMP119:%.*]] = inttoptr i32 [[TMP118]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP120:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP119]], i32 0 +; POST-PROCESS-NEXT: [[TMP121:%.*]] = load i32, ptr addrspace(21) [[TMP120]], align 4 +; POST-PROCESS-NEXT: [[TMP122:%.*]] = add i32 [[TMP3]], 96 +; POST-PROCESS-NEXT: [[TMP123:%.*]] = inttoptr i32 [[TMP122]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP124:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP123]], i32 0 +; POST-PROCESS-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(21) [[TMP124]], align 4 +; POST-PROCESS-NEXT: [[TMP126:%.*]] = add i32 [[TMP3]], 100 ; POST-PROCESS-NEXT: [[TMP127:%.*]] = inttoptr i32 [[TMP126]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP127]], i32 0 ; POST-PROCESS-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(21) [[TMP128]], align 4 -; POST-PROCESS-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP131:%.*]] = add i32 [[TMP130]], -120 -; POST-PROCESS-NEXT: [[TMP132:%.*]] = add i32 [[TMP131]], 188 -; POST-PROCESS-NEXT: [[TMP133:%.*]] = inttoptr i32 [[TMP132]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP134:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP133]], i32 0 -; POST-PROCESS-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(21) [[TMP134]], align 4 -; POST-PROCESS-NEXT: [[TMP136:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP137:%.*]] = add i32 [[TMP136]], -120 -; POST-PROCESS-NEXT: [[TMP138:%.*]] = add i32 [[TMP137]], 192 -; POST-PROCESS-NEXT: [[TMP139:%.*]] = inttoptr i32 [[TMP138]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP139]], i32 0 -; POST-PROCESS-NEXT: [[TMP141:%.*]] = load i32, ptr addrspace(21) [[TMP140]], align 4 -; POST-PROCESS-NEXT: [[TMP142:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP143:%.*]] = add i32 [[TMP142]], -120 -; POST-PROCESS-NEXT: [[TMP144:%.*]] = add i32 [[TMP143]], 196 -; POST-PROCESS-NEXT: [[TMP145:%.*]] = inttoptr i32 [[TMP144]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP146:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP145]], i32 0 -; POST-PROCESS-NEXT: [[TMP147:%.*]] = load i32, ptr addrspace(21) [[TMP146]], align 4 -; POST-PROCESS-NEXT: [[TMP148:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP149:%.*]] = add i32 [[TMP148]], -120 -; POST-PROCESS-NEXT: [[TMP150:%.*]] = add i32 [[TMP149]], 200 -; POST-PROCESS-NEXT: [[TMP151:%.*]] = inttoptr i32 [[TMP150]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP152:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP151]], i32 0 -; POST-PROCESS-NEXT: [[TMP153:%.*]] = load i32, ptr addrspace(21) [[TMP152]], align 4 -; POST-PROCESS-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP155:%.*]] = add i32 [[TMP154]], -120 -; POST-PROCESS-NEXT: [[TMP156:%.*]] = add i32 [[TMP155]], 204 -; POST-PROCESS-NEXT: [[TMP157:%.*]] = inttoptr i32 [[TMP156]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP157]], i32 0 -; POST-PROCESS-NEXT: [[TMP159:%.*]] = load i32, ptr addrspace(21) [[TMP158]], align 4 -; POST-PROCESS-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP161:%.*]] = add i32 [[TMP160]], -120 -; POST-PROCESS-NEXT: [[TMP162:%.*]] = add i32 [[TMP161]], 208 -; POST-PROCESS-NEXT: [[TMP163:%.*]] = inttoptr i32 [[TMP162]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP164:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP163]], i32 0 -; POST-PROCESS-NEXT: [[TMP165:%.*]] = load i32, ptr addrspace(21) [[TMP164]], align 4 -; POST-PROCESS-NEXT: [[TMP166:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP167:%.*]] = add i32 [[TMP166]], -120 -; POST-PROCESS-NEXT: [[TMP168:%.*]] = add i32 [[TMP167]], 212 -; POST-PROCESS-NEXT: [[TMP169:%.*]] = inttoptr i32 [[TMP168]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP170:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP169]], i32 0 -; POST-PROCESS-NEXT: [[TMP171:%.*]] = load i32, ptr addrspace(21) [[TMP170]], align 4 -; POST-PROCESS-NEXT: [[TMP172:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP173:%.*]] = add i32 [[TMP172]], -120 -; POST-PROCESS-NEXT: [[TMP174:%.*]] = add i32 [[TMP173]], 216 -; POST-PROCESS-NEXT: [[TMP175:%.*]] = inttoptr i32 [[TMP174]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP176:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP175]], i32 0 -; POST-PROCESS-NEXT: [[TMP177:%.*]] = load i32, ptr addrspace(21) [[TMP176]], align 4 -; POST-PROCESS-NEXT: [[TMP178:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP179:%.*]] = add i32 [[TMP178]], -120 -; POST-PROCESS-NEXT: [[TMP180:%.*]] = add i32 [[TMP179]], 220 -; POST-PROCESS-NEXT: [[TMP181:%.*]] = inttoptr i32 [[TMP180]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP182:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP181]], i32 0 -; POST-PROCESS-NEXT: [[TMP183:%.*]] = load i32, ptr addrspace(21) [[TMP182]], align 4 -; POST-PROCESS-NEXT: [[TMP184:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP185:%.*]] = add i32 [[TMP184]], -120 -; POST-PROCESS-NEXT: [[TMP186:%.*]] = add i32 [[TMP185]], 224 -; POST-PROCESS-NEXT: [[TMP187:%.*]] = inttoptr i32 [[TMP186]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP188:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP187]], i32 0 -; POST-PROCESS-NEXT: [[TMP189:%.*]] = load i32, ptr addrspace(21) [[TMP188]], align 4 +; POST-PROCESS-NEXT: [[TMP130:%.*]] = add i32 [[TMP3]], 104 +; POST-PROCESS-NEXT: [[TMP131:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP132:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP131]], i32 0 +; POST-PROCESS-NEXT: [[TMP133:%.*]] = load i32, ptr addrspace(21) [[TMP132]], align 4 +; POST-PROCESS-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; POST-PROCESS-NEXT: [[TMP135:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP136:%.*]] = add i32 [[TMP135]], -108 +; POST-PROCESS-NEXT: store i32 [[TMP136]], ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: ret void ; POST-PROCESS: entryresume.0.split: ; POST-PROCESS-NEXT: unreachable @@ -1246,400 +1347,292 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; POST-PROCESS-NEXT: store i32 [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 ; POST-PROCESS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP27:%.*]] = add i32 [[TMP26]], -120 -; POST-PROCESS-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 120 -; POST-PROCESS-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP28]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP29]], i32 0 -; POST-PROCESS-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(21) [[TMP30]], align 4 -; POST-PROCESS-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP33:%.*]] = add i32 [[TMP32]], -120 -; POST-PROCESS-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], 124 +; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP27]], i32 0 +; POST-PROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(21) [[TMP28]], align 4 +; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP3]], 4 +; POST-PROCESS-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP31]], i32 0 +; POST-PROCESS-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(21) [[TMP32]], align 4 +; POST-PROCESS-NEXT: [[TMP34:%.*]] = add i32 [[TMP3]], 8 ; POST-PROCESS-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP34]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP35]], i32 0 ; POST-PROCESS-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(21) [[TMP36]], align 4 -; POST-PROCESS-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP39:%.*]] = add i32 [[TMP38]], -120 -; POST-PROCESS-NEXT: [[TMP40:%.*]] = add i32 [[TMP39]], 128 -; POST-PROCESS-NEXT: [[TMP41:%.*]] = inttoptr i32 [[TMP40]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP41]], i32 0 -; POST-PROCESS-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(21) [[TMP42]], align 4 -; POST-PROCESS-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP45:%.*]] = add i32 [[TMP44]], -120 -; POST-PROCESS-NEXT: [[TMP46:%.*]] = add i32 [[TMP45]], 132 +; POST-PROCESS-NEXT: [[TMP38:%.*]] = add i32 [[TMP3]], 12 +; POST-PROCESS-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP38]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP39]], i32 0 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(21) [[TMP40]], align 4 +; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[TMP3]], 16 +; POST-PROCESS-NEXT: [[TMP43:%.*]] = inttoptr i32 [[TMP42]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP43]], i32 0 +; POST-PROCESS-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(21) [[TMP44]], align 4 +; POST-PROCESS-NEXT: [[TMP46:%.*]] = add i32 [[TMP3]], 20 ; POST-PROCESS-NEXT: [[TMP47:%.*]] = inttoptr i32 [[TMP46]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP47]], i32 0 ; POST-PROCESS-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(21) [[TMP48]], align 4 -; POST-PROCESS-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], -120 -; POST-PROCESS-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 136 -; POST-PROCESS-NEXT: [[TMP53:%.*]] = inttoptr i32 [[TMP52]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP53]], i32 0 -; POST-PROCESS-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(21) [[TMP54]], align 4 -; POST-PROCESS-NEXT: [[TMP56:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP57:%.*]] = add i32 [[TMP56]], -120 -; POST-PROCESS-NEXT: [[TMP58:%.*]] = add i32 [[TMP57]], 140 +; POST-PROCESS-NEXT: [[TMP50:%.*]] = add i32 [[TMP3]], 24 +; POST-PROCESS-NEXT: [[TMP51:%.*]] = inttoptr i32 [[TMP50]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP51]], i32 0 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(21) [[TMP52]], align 4 +; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP3]], 28 +; POST-PROCESS-NEXT: [[TMP55:%.*]] = inttoptr i32 [[TMP54]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP55]], i32 0 +; POST-PROCESS-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(21) [[TMP56]], align 4 +; POST-PROCESS-NEXT: [[TMP58:%.*]] = add i32 [[TMP3]], 32 ; POST-PROCESS-NEXT: [[TMP59:%.*]] = inttoptr i32 [[TMP58]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP59]], i32 0 ; POST-PROCESS-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(21) [[TMP60]], align 4 -; POST-PROCESS-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP63:%.*]] = add i32 [[TMP62]], -120 -; POST-PROCESS-NEXT: [[TMP64:%.*]] = add i32 [[TMP63]], 144 -; POST-PROCESS-NEXT: [[TMP65:%.*]] = inttoptr i32 [[TMP64]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP65]], i32 0 -; POST-PROCESS-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(21) [[TMP66]], align 4 -; POST-PROCESS-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP69:%.*]] = add i32 [[TMP68]], -120 -; POST-PROCESS-NEXT: [[TMP70:%.*]] = add i32 [[TMP69]], 148 +; POST-PROCESS-NEXT: [[TMP62:%.*]] = add i32 [[TMP3]], 36 +; POST-PROCESS-NEXT: [[TMP63:%.*]] = inttoptr i32 [[TMP62]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP63]], i32 0 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(21) [[TMP64]], align 4 +; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[TMP3]], 40 +; POST-PROCESS-NEXT: [[TMP67:%.*]] = inttoptr i32 [[TMP66]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP67]], i32 0 +; POST-PROCESS-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(21) [[TMP68]], align 4 +; POST-PROCESS-NEXT: [[TMP70:%.*]] = add i32 [[TMP3]], 44 ; POST-PROCESS-NEXT: [[TMP71:%.*]] = inttoptr i32 [[TMP70]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP71]], i32 0 ; POST-PROCESS-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(21) [[TMP72]], align 4 -; POST-PROCESS-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP75:%.*]] = add i32 [[TMP74]], -120 -; POST-PROCESS-NEXT: [[TMP76:%.*]] = add i32 [[TMP75]], 152 -; POST-PROCESS-NEXT: [[TMP77:%.*]] = inttoptr i32 [[TMP76]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP77]], i32 0 -; POST-PROCESS-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(21) [[TMP78]], align 4 -; POST-PROCESS-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP81:%.*]] = add i32 [[TMP80]], -120 -; POST-PROCESS-NEXT: [[TMP82:%.*]] = add i32 [[TMP81]], 156 +; POST-PROCESS-NEXT: [[TMP74:%.*]] = add i32 [[TMP3]], 48 +; POST-PROCESS-NEXT: [[TMP75:%.*]] = inttoptr i32 [[TMP74]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP75]], i32 0 +; POST-PROCESS-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(21) [[TMP76]], align 4 +; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[TMP3]], 52 +; POST-PROCESS-NEXT: [[TMP79:%.*]] = inttoptr i32 [[TMP78]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP79]], i32 0 +; POST-PROCESS-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(21) [[TMP80]], align 4 +; POST-PROCESS-NEXT: [[TMP82:%.*]] = add i32 [[TMP3]], 56 ; POST-PROCESS-NEXT: [[TMP83:%.*]] = inttoptr i32 [[TMP82]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP83]], i32 0 ; POST-PROCESS-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(21) [[TMP84]], align 4 -; POST-PROCESS-NEXT: [[TMP86:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP87:%.*]] = add i32 [[TMP86]], -120 -; POST-PROCESS-NEXT: [[TMP88:%.*]] = add i32 [[TMP87]], 160 -; POST-PROCESS-NEXT: [[TMP89:%.*]] = inttoptr i32 [[TMP88]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP89]], i32 0 -; POST-PROCESS-NEXT: [[TMP91:%.*]] = load i32, ptr addrspace(21) [[TMP90]], align 4 -; POST-PROCESS-NEXT: [[TMP92:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP93:%.*]] = add i32 [[TMP92]], -120 -; POST-PROCESS-NEXT: [[TMP94:%.*]] = add i32 [[TMP93]], 164 +; POST-PROCESS-NEXT: [[TMP86:%.*]] = add i32 [[TMP3]], 60 +; POST-PROCESS-NEXT: [[TMP87:%.*]] = inttoptr i32 [[TMP86]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP87]], i32 0 +; POST-PROCESS-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(21) [[TMP88]], align 4 +; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP3]], 64 +; POST-PROCESS-NEXT: [[TMP91:%.*]] = inttoptr i32 [[TMP90]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP91]], i32 0 +; POST-PROCESS-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(21) [[TMP92]], align 4 +; POST-PROCESS-NEXT: [[TMP94:%.*]] = add i32 [[TMP3]], 68 ; POST-PROCESS-NEXT: [[TMP95:%.*]] = inttoptr i32 [[TMP94]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP95]], i32 0 ; POST-PROCESS-NEXT: [[TMP97:%.*]] = load i32, ptr addrspace(21) [[TMP96]], align 4 -; POST-PROCESS-NEXT: [[TMP98:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP99:%.*]] = add i32 [[TMP98]], -120 -; POST-PROCESS-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], 168 -; POST-PROCESS-NEXT: [[TMP101:%.*]] = inttoptr i32 [[TMP100]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP102:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP101]], i32 0 -; POST-PROCESS-NEXT: [[TMP103:%.*]] = load i32, ptr addrspace(21) [[TMP102]], align 4 -; POST-PROCESS-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP105:%.*]] = add i32 [[TMP104]], -120 -; POST-PROCESS-NEXT: [[TMP106:%.*]] = add i32 [[TMP105]], 172 +; POST-PROCESS-NEXT: [[TMP98:%.*]] = add i32 [[TMP3]], 72 +; POST-PROCESS-NEXT: [[TMP99:%.*]] = inttoptr i32 [[TMP98]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP99]], i32 0 +; POST-PROCESS-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(21) [[TMP100]], align 4 +; POST-PROCESS-NEXT: [[TMP102:%.*]] = add i32 [[TMP3]], 76 +; POST-PROCESS-NEXT: [[TMP103:%.*]] = inttoptr i32 [[TMP102]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP103]], i32 0 +; POST-PROCESS-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(21) [[TMP104]], align 4 +; POST-PROCESS-NEXT: [[TMP106:%.*]] = add i32 [[TMP3]], 80 ; POST-PROCESS-NEXT: [[TMP107:%.*]] = inttoptr i32 [[TMP106]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP107]], i32 0 ; POST-PROCESS-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(21) [[TMP108]], align 4 -; POST-PROCESS-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP111:%.*]] = add i32 [[TMP110]], -120 -; POST-PROCESS-NEXT: [[TMP112:%.*]] = add i32 [[TMP111]], 176 -; POST-PROCESS-NEXT: [[TMP113:%.*]] = inttoptr i32 [[TMP112]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP114:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP113]], i32 0 -; POST-PROCESS-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(21) [[TMP114]], align 4 -; POST-PROCESS-NEXT: [[TMP116:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP117:%.*]] = add i32 [[TMP116]], -120 -; POST-PROCESS-NEXT: [[TMP118:%.*]] = add i32 [[TMP117]], 180 +; POST-PROCESS-NEXT: [[TMP110:%.*]] = add i32 [[TMP3]], 84 +; POST-PROCESS-NEXT: [[TMP111:%.*]] = inttoptr i32 [[TMP110]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP112:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP111]], i32 0 +; POST-PROCESS-NEXT: [[TMP113:%.*]] = load i32, ptr addrspace(21) [[TMP112]], align 4 +; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP3]], 88 +; POST-PROCESS-NEXT: [[TMP115:%.*]] = inttoptr i32 [[TMP114]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP116:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP115]], i32 0 +; POST-PROCESS-NEXT: [[TMP117:%.*]] = load i32, ptr addrspace(21) [[TMP116]], align 4 +; POST-PROCESS-NEXT: [[TMP118:%.*]] = add i32 [[TMP3]], 92 ; POST-PROCESS-NEXT: [[TMP119:%.*]] = inttoptr i32 [[TMP118]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP120:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP119]], i32 0 ; POST-PROCESS-NEXT: [[TMP121:%.*]] = load i32, ptr addrspace(21) [[TMP120]], align 4 -; POST-PROCESS-NEXT: [[TMP122:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP123:%.*]] = add i32 [[TMP122]], -120 -; POST-PROCESS-NEXT: [[TMP124:%.*]] = add i32 [[TMP123]], 184 -; POST-PROCESS-NEXT: [[TMP125:%.*]] = inttoptr i32 [[TMP124]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP126:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP125]], i32 0 -; POST-PROCESS-NEXT: [[TMP127:%.*]] = load i32, ptr addrspace(21) [[TMP126]], align 4 -; POST-PROCESS-NEXT: [[TMP128:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP129:%.*]] = add i32 [[TMP128]], -120 -; POST-PROCESS-NEXT: [[TMP130:%.*]] = add i32 [[TMP129]], 188 +; POST-PROCESS-NEXT: [[TMP122:%.*]] = add i32 [[TMP3]], 96 +; POST-PROCESS-NEXT: [[TMP123:%.*]] = inttoptr i32 [[TMP122]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP124:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP123]], i32 0 +; POST-PROCESS-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(21) [[TMP124]], align 4 +; POST-PROCESS-NEXT: [[TMP126:%.*]] = add i32 [[TMP3]], 100 +; POST-PROCESS-NEXT: [[TMP127:%.*]] = inttoptr i32 [[TMP126]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP127]], i32 0 +; POST-PROCESS-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(21) [[TMP128]], align 4 +; POST-PROCESS-NEXT: [[TMP130:%.*]] = add i32 [[TMP3]], 104 ; POST-PROCESS-NEXT: [[TMP131:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP132:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP131]], i32 0 ; POST-PROCESS-NEXT: [[TMP133:%.*]] = load i32, ptr addrspace(21) [[TMP132]], align 4 -; POST-PROCESS-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP135:%.*]] = add i32 [[TMP134]], -120 -; POST-PROCESS-NEXT: [[TMP136:%.*]] = add i32 [[TMP135]], 192 -; POST-PROCESS-NEXT: [[TMP137:%.*]] = inttoptr i32 [[TMP136]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP138:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP137]], i32 0 -; POST-PROCESS-NEXT: [[TMP139:%.*]] = load i32, ptr addrspace(21) [[TMP138]], align 4 -; POST-PROCESS-NEXT: [[TMP140:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP141:%.*]] = add i32 [[TMP140]], -120 -; POST-PROCESS-NEXT: [[TMP142:%.*]] = add i32 [[TMP141]], 196 -; POST-PROCESS-NEXT: [[TMP143:%.*]] = inttoptr i32 [[TMP142]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP144:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP143]], i32 0 -; POST-PROCESS-NEXT: [[TMP145:%.*]] = load i32, ptr addrspace(21) [[TMP144]], align 4 -; POST-PROCESS-NEXT: [[TMP146:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP147:%.*]] = add i32 [[TMP146]], -120 -; POST-PROCESS-NEXT: [[TMP148:%.*]] = add i32 [[TMP147]], 200 -; POST-PROCESS-NEXT: [[TMP149:%.*]] = inttoptr i32 [[TMP148]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP150:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP149]], i32 0 -; POST-PROCESS-NEXT: [[TMP151:%.*]] = load i32, ptr addrspace(21) [[TMP150]], align 4 -; POST-PROCESS-NEXT: [[TMP152:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP153:%.*]] = add i32 [[TMP152]], -120 -; POST-PROCESS-NEXT: [[TMP154:%.*]] = add i32 [[TMP153]], 204 -; POST-PROCESS-NEXT: [[TMP155:%.*]] = inttoptr i32 [[TMP154]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP156:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP155]], i32 0 -; POST-PROCESS-NEXT: [[TMP157:%.*]] = load i32, ptr addrspace(21) [[TMP156]], align 4 -; POST-PROCESS-NEXT: [[TMP158:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP159:%.*]] = add i32 [[TMP158]], -120 -; POST-PROCESS-NEXT: [[TMP160:%.*]] = add i32 [[TMP159]], 208 -; POST-PROCESS-NEXT: [[TMP161:%.*]] = inttoptr i32 [[TMP160]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP162:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP161]], i32 0 -; POST-PROCESS-NEXT: [[TMP163:%.*]] = load i32, ptr addrspace(21) [[TMP162]], align 4 -; POST-PROCESS-NEXT: [[TMP164:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP165:%.*]] = add i32 [[TMP164]], -120 -; POST-PROCESS-NEXT: [[TMP166:%.*]] = add i32 [[TMP165]], 212 -; POST-PROCESS-NEXT: [[TMP167:%.*]] = inttoptr i32 [[TMP166]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP168:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP167]], i32 0 -; POST-PROCESS-NEXT: [[TMP169:%.*]] = load i32, ptr addrspace(21) [[TMP168]], align 4 -; POST-PROCESS-NEXT: [[TMP170:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP171:%.*]] = add i32 [[TMP170]], -120 -; POST-PROCESS-NEXT: [[TMP172:%.*]] = add i32 [[TMP171]], 216 -; POST-PROCESS-NEXT: [[TMP173:%.*]] = inttoptr i32 [[TMP172]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP174:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP173]], i32 0 -; POST-PROCESS-NEXT: [[TMP175:%.*]] = load i32, ptr addrspace(21) [[TMP174]], align 4 -; POST-PROCESS-NEXT: [[TMP176:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP177:%.*]] = add i32 [[TMP176]], -120 -; POST-PROCESS-NEXT: [[TMP178:%.*]] = add i32 [[TMP177]], 220 -; POST-PROCESS-NEXT: [[TMP179:%.*]] = inttoptr i32 [[TMP178]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP180:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP179]], i32 0 -; POST-PROCESS-NEXT: [[TMP181:%.*]] = load i32, ptr addrspace(21) [[TMP180]], align 4 -; POST-PROCESS-NEXT: [[TMP182:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP183:%.*]] = add i32 [[TMP182]], -120 -; POST-PROCESS-NEXT: [[TMP184:%.*]] = add i32 [[TMP183]], 224 -; POST-PROCESS-NEXT: [[TMP185:%.*]] = inttoptr i32 [[TMP184]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP186:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP185]], i32 0 -; POST-PROCESS-NEXT: [[TMP187:%.*]] = load i32, ptr addrspace(21) [[TMP186]], align 4 -; POST-PROCESS-NEXT: [[TMP188:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POST-PROCESS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP188]], i32 0, i32 1 +; POST-PROCESS-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POST-PROCESS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP134]], i32 0, i32 1 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], i32 0, i32 0 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_LOAD:%.*]] = load <2 x float>, ptr [[VAL_I_FCA_0_GEP]], align 4 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[VAL_I_FCA_0_LOAD]], 0 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 ; POST-PROCESS-NEXT: [[DOTSROA_011_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; POST-PROCESS-NEXT: [[TMP189:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[TMP135:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 ; POST-PROCESS-NEXT: [[DOTSROA_011_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; POST-PROCESS-NEXT: [[TMP190:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[TMP136:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 ; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 ; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POST-PROCESS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; POST-PROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP191:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP192:%.*]] = add i32 [[TMP191]], -120 -; POST-PROCESS-NEXT: [[TMP193:%.*]] = add i32 [[TMP192]], 120 -; POST-PROCESS-NEXT: [[TMP194:%.*]] = inttoptr i32 [[TMP193]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP195:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP194]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP31]], ptr addrspace(21) [[TMP195]], align 4 -; POST-PROCESS-NEXT: [[TMP196:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP197:%.*]] = add i32 [[TMP196]], -120 -; POST-PROCESS-NEXT: [[TMP198:%.*]] = add i32 [[TMP197]], 124 -; POST-PROCESS-NEXT: [[TMP199:%.*]] = inttoptr i32 [[TMP198]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP200:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP199]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP37]], ptr addrspace(21) [[TMP200]], align 4 -; POST-PROCESS-NEXT: [[TMP201:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP202:%.*]] = add i32 [[TMP201]], -120 -; POST-PROCESS-NEXT: [[TMP203:%.*]] = add i32 [[TMP202]], 128 +; POST-PROCESS-NEXT: [[TMP137:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-NEXT: [[TMP138:%.*]] = inttoptr i32 [[TMP137]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP139:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP138]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP29]], ptr addrspace(21) [[TMP139]], align 4 +; POST-PROCESS-NEXT: [[TMP140:%.*]] = add i32 [[TMP137]], 4 +; POST-PROCESS-NEXT: [[TMP141:%.*]] = inttoptr i32 [[TMP140]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP142:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP141]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP33]], ptr addrspace(21) [[TMP142]], align 4 +; POST-PROCESS-NEXT: [[TMP143:%.*]] = add i32 [[TMP137]], 8 +; POST-PROCESS-NEXT: [[TMP144:%.*]] = inttoptr i32 [[TMP143]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP145:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP144]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP37]], ptr addrspace(21) [[TMP145]], align 4 +; POST-PROCESS-NEXT: [[TMP146:%.*]] = add i32 [[TMP137]], 12 +; POST-PROCESS-NEXT: [[TMP147:%.*]] = inttoptr i32 [[TMP146]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP148:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP147]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP41]], ptr addrspace(21) [[TMP148]], align 4 +; POST-PROCESS-NEXT: [[TMP149:%.*]] = add i32 [[TMP137]], 16 +; POST-PROCESS-NEXT: [[TMP150:%.*]] = inttoptr i32 [[TMP149]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP151:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP150]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP45]], ptr addrspace(21) [[TMP151]], align 4 +; POST-PROCESS-NEXT: [[TMP152:%.*]] = add i32 [[TMP137]], 20 +; POST-PROCESS-NEXT: [[TMP153:%.*]] = inttoptr i32 [[TMP152]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP154:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP153]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP49]], ptr addrspace(21) [[TMP154]], align 4 +; POST-PROCESS-NEXT: [[TMP155:%.*]] = add i32 [[TMP137]], 24 +; POST-PROCESS-NEXT: [[TMP156:%.*]] = inttoptr i32 [[TMP155]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP157:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP156]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP53]], ptr addrspace(21) [[TMP157]], align 4 +; POST-PROCESS-NEXT: [[TMP158:%.*]] = add i32 [[TMP137]], 28 +; POST-PROCESS-NEXT: [[TMP159:%.*]] = inttoptr i32 [[TMP158]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP160:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP159]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP57]], ptr addrspace(21) [[TMP160]], align 4 +; POST-PROCESS-NEXT: [[TMP161:%.*]] = add i32 [[TMP137]], 32 +; POST-PROCESS-NEXT: [[TMP162:%.*]] = inttoptr i32 [[TMP161]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP163:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP162]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP61]], ptr addrspace(21) [[TMP163]], align 4 +; POST-PROCESS-NEXT: [[TMP164:%.*]] = add i32 [[TMP137]], 36 +; POST-PROCESS-NEXT: [[TMP165:%.*]] = inttoptr i32 [[TMP164]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP166:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP165]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP65]], ptr addrspace(21) [[TMP166]], align 4 +; POST-PROCESS-NEXT: [[TMP167:%.*]] = add i32 [[TMP137]], 40 +; POST-PROCESS-NEXT: [[TMP168:%.*]] = inttoptr i32 [[TMP167]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP169:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP168]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP69]], ptr addrspace(21) [[TMP169]], align 4 +; POST-PROCESS-NEXT: [[TMP170:%.*]] = add i32 [[TMP137]], 44 +; POST-PROCESS-NEXT: [[TMP171:%.*]] = inttoptr i32 [[TMP170]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP172:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP171]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP73]], ptr addrspace(21) [[TMP172]], align 4 +; POST-PROCESS-NEXT: [[TMP173:%.*]] = add i32 [[TMP137]], 48 +; POST-PROCESS-NEXT: [[TMP174:%.*]] = inttoptr i32 [[TMP173]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP175:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP174]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP77]], ptr addrspace(21) [[TMP175]], align 4 +; POST-PROCESS-NEXT: [[TMP176:%.*]] = add i32 [[TMP137]], 52 +; POST-PROCESS-NEXT: [[TMP177:%.*]] = inttoptr i32 [[TMP176]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP178:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP177]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP81]], ptr addrspace(21) [[TMP178]], align 4 +; POST-PROCESS-NEXT: [[TMP179:%.*]] = add i32 [[TMP137]], 56 +; POST-PROCESS-NEXT: [[TMP180:%.*]] = inttoptr i32 [[TMP179]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP181:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP180]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP85]], ptr addrspace(21) [[TMP181]], align 4 +; POST-PROCESS-NEXT: [[TMP182:%.*]] = add i32 [[TMP137]], 60 +; POST-PROCESS-NEXT: [[TMP183:%.*]] = inttoptr i32 [[TMP182]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP184:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP183]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP89]], ptr addrspace(21) [[TMP184]], align 4 +; POST-PROCESS-NEXT: [[TMP185:%.*]] = add i32 [[TMP137]], 64 +; POST-PROCESS-NEXT: [[TMP186:%.*]] = inttoptr i32 [[TMP185]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP187:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP186]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP93]], ptr addrspace(21) [[TMP187]], align 4 +; POST-PROCESS-NEXT: [[TMP188:%.*]] = add i32 [[TMP137]], 68 +; POST-PROCESS-NEXT: [[TMP189:%.*]] = inttoptr i32 [[TMP188]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP190:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP189]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP97]], ptr addrspace(21) [[TMP190]], align 4 +; POST-PROCESS-NEXT: [[TMP191:%.*]] = add i32 [[TMP137]], 72 +; POST-PROCESS-NEXT: [[TMP192:%.*]] = inttoptr i32 [[TMP191]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP193:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP192]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP101]], ptr addrspace(21) [[TMP193]], align 4 +; POST-PROCESS-NEXT: [[TMP194:%.*]] = add i32 [[TMP137]], 76 +; POST-PROCESS-NEXT: [[TMP195:%.*]] = inttoptr i32 [[TMP194]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP196:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP195]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP105]], ptr addrspace(21) [[TMP196]], align 4 +; POST-PROCESS-NEXT: [[TMP197:%.*]] = add i32 [[TMP137]], 80 +; POST-PROCESS-NEXT: [[TMP198:%.*]] = inttoptr i32 [[TMP197]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP199:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP198]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP109]], ptr addrspace(21) [[TMP199]], align 4 +; POST-PROCESS-NEXT: [[TMP200:%.*]] = add i32 [[TMP137]], 84 +; POST-PROCESS-NEXT: [[TMP201:%.*]] = inttoptr i32 [[TMP200]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP202:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP201]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP113]], ptr addrspace(21) [[TMP202]], align 4 +; POST-PROCESS-NEXT: [[TMP203:%.*]] = add i32 [[TMP137]], 88 ; POST-PROCESS-NEXT: [[TMP204:%.*]] = inttoptr i32 [[TMP203]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP205:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP204]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP43]], ptr addrspace(21) [[TMP205]], align 4 -; POST-PROCESS-NEXT: [[TMP206:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP207:%.*]] = add i32 [[TMP206]], -120 -; POST-PROCESS-NEXT: [[TMP208:%.*]] = add i32 [[TMP207]], 132 -; POST-PROCESS-NEXT: [[TMP209:%.*]] = inttoptr i32 [[TMP208]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP210:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP209]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP49]], ptr addrspace(21) [[TMP210]], align 4 -; POST-PROCESS-NEXT: [[TMP211:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP212:%.*]] = add i32 [[TMP211]], -120 -; POST-PROCESS-NEXT: [[TMP213:%.*]] = add i32 [[TMP212]], 136 -; POST-PROCESS-NEXT: [[TMP214:%.*]] = inttoptr i32 [[TMP213]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP215:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP214]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP55]], ptr addrspace(21) [[TMP215]], align 4 -; POST-PROCESS-NEXT: [[TMP216:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP217:%.*]] = add i32 [[TMP216]], -120 -; POST-PROCESS-NEXT: [[TMP218:%.*]] = add i32 [[TMP217]], 140 -; POST-PROCESS-NEXT: [[TMP219:%.*]] = inttoptr i32 [[TMP218]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP220:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP219]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP61]], ptr addrspace(21) [[TMP220]], align 4 -; POST-PROCESS-NEXT: [[TMP221:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP222:%.*]] = add i32 [[TMP221]], -120 -; POST-PROCESS-NEXT: [[TMP223:%.*]] = add i32 [[TMP222]], 144 -; POST-PROCESS-NEXT: [[TMP224:%.*]] = inttoptr i32 [[TMP223]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP225:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP224]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP67]], ptr addrspace(21) [[TMP225]], align 4 -; POST-PROCESS-NEXT: [[TMP226:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP227:%.*]] = add i32 [[TMP226]], -120 -; POST-PROCESS-NEXT: [[TMP228:%.*]] = add i32 [[TMP227]], 148 -; POST-PROCESS-NEXT: [[TMP229:%.*]] = inttoptr i32 [[TMP228]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP230:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP229]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP73]], ptr addrspace(21) [[TMP230]], align 4 -; POST-PROCESS-NEXT: [[TMP231:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP232:%.*]] = add i32 [[TMP231]], -120 -; POST-PROCESS-NEXT: [[TMP233:%.*]] = add i32 [[TMP232]], 152 -; POST-PROCESS-NEXT: [[TMP234:%.*]] = inttoptr i32 [[TMP233]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP235:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP234]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP79]], ptr addrspace(21) [[TMP235]], align 4 -; POST-PROCESS-NEXT: [[TMP236:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP237:%.*]] = add i32 [[TMP236]], -120 -; POST-PROCESS-NEXT: [[TMP238:%.*]] = add i32 [[TMP237]], 156 -; POST-PROCESS-NEXT: [[TMP239:%.*]] = inttoptr i32 [[TMP238]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP240:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP239]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP85]], ptr addrspace(21) [[TMP240]], align 4 -; POST-PROCESS-NEXT: [[TMP241:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP242:%.*]] = add i32 [[TMP241]], -120 -; POST-PROCESS-NEXT: [[TMP243:%.*]] = add i32 [[TMP242]], 160 -; POST-PROCESS-NEXT: [[TMP244:%.*]] = inttoptr i32 [[TMP243]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP245:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP244]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP91]], ptr addrspace(21) [[TMP245]], align 4 -; POST-PROCESS-NEXT: [[TMP246:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP247:%.*]] = add i32 [[TMP246]], -120 -; POST-PROCESS-NEXT: [[TMP248:%.*]] = add i32 [[TMP247]], 164 -; POST-PROCESS-NEXT: [[TMP249:%.*]] = inttoptr i32 [[TMP248]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP250:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP249]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP97]], ptr addrspace(21) [[TMP250]], align 4 -; POST-PROCESS-NEXT: [[TMP251:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP252:%.*]] = add i32 [[TMP251]], -120 -; POST-PROCESS-NEXT: [[TMP253:%.*]] = add i32 [[TMP252]], 168 -; POST-PROCESS-NEXT: [[TMP254:%.*]] = inttoptr i32 [[TMP253]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP255:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP254]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP103]], ptr addrspace(21) [[TMP255]], align 4 -; POST-PROCESS-NEXT: [[TMP256:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP257:%.*]] = add i32 [[TMP256]], -120 -; POST-PROCESS-NEXT: [[TMP258:%.*]] = add i32 [[TMP257]], 172 -; POST-PROCESS-NEXT: [[TMP259:%.*]] = inttoptr i32 [[TMP258]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP260:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP259]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP109]], ptr addrspace(21) [[TMP260]], align 4 -; POST-PROCESS-NEXT: [[TMP261:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP262:%.*]] = add i32 [[TMP261]], -120 -; POST-PROCESS-NEXT: [[TMP263:%.*]] = add i32 [[TMP262]], 176 -; POST-PROCESS-NEXT: [[TMP264:%.*]] = inttoptr i32 [[TMP263]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP265:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP264]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP115]], ptr addrspace(21) [[TMP265]], align 4 -; POST-PROCESS-NEXT: [[TMP266:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP267:%.*]] = add i32 [[TMP266]], -120 -; POST-PROCESS-NEXT: [[TMP268:%.*]] = add i32 [[TMP267]], 180 -; POST-PROCESS-NEXT: [[TMP269:%.*]] = inttoptr i32 [[TMP268]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP270:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP269]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP121]], ptr addrspace(21) [[TMP270]], align 4 -; POST-PROCESS-NEXT: [[TMP271:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP272:%.*]] = add i32 [[TMP271]], -120 -; POST-PROCESS-NEXT: [[TMP273:%.*]] = add i32 [[TMP272]], 184 -; POST-PROCESS-NEXT: [[TMP274:%.*]] = inttoptr i32 [[TMP273]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP275:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP274]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP127]], ptr addrspace(21) [[TMP275]], align 4 -; POST-PROCESS-NEXT: [[TMP276:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP277:%.*]] = add i32 [[TMP276]], -120 -; POST-PROCESS-NEXT: [[TMP278:%.*]] = add i32 [[TMP277]], 188 -; POST-PROCESS-NEXT: [[TMP279:%.*]] = inttoptr i32 [[TMP278]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP280:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP279]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP133]], ptr addrspace(21) [[TMP280]], align 4 -; POST-PROCESS-NEXT: [[TMP281:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP282:%.*]] = add i32 [[TMP281]], -120 -; POST-PROCESS-NEXT: [[TMP283:%.*]] = add i32 [[TMP282]], 192 -; POST-PROCESS-NEXT: [[TMP284:%.*]] = inttoptr i32 [[TMP283]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP285:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP284]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP139]], ptr addrspace(21) [[TMP285]], align 4 -; POST-PROCESS-NEXT: [[TMP286:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP287:%.*]] = add i32 [[TMP286]], -120 -; POST-PROCESS-NEXT: [[TMP288:%.*]] = add i32 [[TMP287]], 196 -; POST-PROCESS-NEXT: [[TMP289:%.*]] = inttoptr i32 [[TMP288]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP290:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP289]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP145]], ptr addrspace(21) [[TMP290]], align 4 -; POST-PROCESS-NEXT: [[TMP291:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP292:%.*]] = add i32 [[TMP291]], -120 -; POST-PROCESS-NEXT: [[TMP293:%.*]] = add i32 [[TMP292]], 200 -; POST-PROCESS-NEXT: [[TMP294:%.*]] = inttoptr i32 [[TMP293]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP295:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP294]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP151]], ptr addrspace(21) [[TMP295]], align 4 -; POST-PROCESS-NEXT: [[TMP296:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP297:%.*]] = add i32 [[TMP296]], -120 -; POST-PROCESS-NEXT: [[TMP298:%.*]] = add i32 [[TMP297]], 204 -; POST-PROCESS-NEXT: [[TMP299:%.*]] = inttoptr i32 [[TMP298]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP300:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP299]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP157]], ptr addrspace(21) [[TMP300]], align 4 -; POST-PROCESS-NEXT: [[TMP301:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP302:%.*]] = add i32 [[TMP301]], -120 -; POST-PROCESS-NEXT: [[TMP303:%.*]] = add i32 [[TMP302]], 208 -; POST-PROCESS-NEXT: [[TMP304:%.*]] = inttoptr i32 [[TMP303]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP305:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP304]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP163]], ptr addrspace(21) [[TMP305]], align 4 -; POST-PROCESS-NEXT: [[TMP306:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP307:%.*]] = add i32 [[TMP306]], -120 -; POST-PROCESS-NEXT: [[TMP308:%.*]] = add i32 [[TMP307]], 212 -; POST-PROCESS-NEXT: [[TMP309:%.*]] = inttoptr i32 [[TMP308]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP310:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP309]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP169]], ptr addrspace(21) [[TMP310]], align 4 -; POST-PROCESS-NEXT: [[TMP311:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP312:%.*]] = add i32 [[TMP311]], -120 -; POST-PROCESS-NEXT: [[TMP313:%.*]] = add i32 [[TMP312]], 216 -; POST-PROCESS-NEXT: [[TMP314:%.*]] = inttoptr i32 [[TMP313]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP315:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP314]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP175]], ptr addrspace(21) [[TMP315]], align 4 -; POST-PROCESS-NEXT: [[TMP316:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP317:%.*]] = add i32 [[TMP316]], -120 -; POST-PROCESS-NEXT: [[TMP318:%.*]] = add i32 [[TMP317]], 220 -; POST-PROCESS-NEXT: [[TMP319:%.*]] = inttoptr i32 [[TMP318]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP320:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP319]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP181]], ptr addrspace(21) [[TMP320]], align 4 -; POST-PROCESS-NEXT: [[TMP321:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP322:%.*]] = add i32 [[TMP321]], -120 -; POST-PROCESS-NEXT: [[TMP323:%.*]] = add i32 [[TMP322]], 224 -; POST-PROCESS-NEXT: [[TMP324:%.*]] = inttoptr i32 [[TMP323]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP325:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP324]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP187]], ptr addrspace(21) [[TMP325]], align 4 +; POST-PROCESS-NEXT: store i32 [[TMP117]], ptr addrspace(21) [[TMP205]], align 4 +; POST-PROCESS-NEXT: [[TMP206:%.*]] = add i32 [[TMP137]], 92 +; POST-PROCESS-NEXT: [[TMP207:%.*]] = inttoptr i32 [[TMP206]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP208:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP207]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP121]], ptr addrspace(21) [[TMP208]], align 4 +; POST-PROCESS-NEXT: [[TMP209:%.*]] = add i32 [[TMP137]], 96 +; POST-PROCESS-NEXT: [[TMP210:%.*]] = inttoptr i32 [[TMP209]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP211:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP210]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP125]], ptr addrspace(21) [[TMP211]], align 4 +; POST-PROCESS-NEXT: [[TMP212:%.*]] = add i32 [[TMP137]], 100 +; POST-PROCESS-NEXT: [[TMP213:%.*]] = inttoptr i32 [[TMP212]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP214:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP213]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP129]], ptr addrspace(21) [[TMP214]], align 4 +; POST-PROCESS-NEXT: [[TMP215:%.*]] = add i32 [[TMP137]], 104 +; POST-PROCESS-NEXT: [[TMP216:%.*]] = inttoptr i32 [[TMP215]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP217:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP216]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP133]], ptr addrspace(21) [[TMP217]], align 4 ; POST-PROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; POST-PROCESS-NEXT: [[TMP326:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 -; POST-PROCESS-NEXT: [[TMP327:%.*]] = bitcast i32 [[TMP326]] to float -; POST-PROCESS-NEXT: [[DOTSROA_012_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP327]], i32 0 +; POST-PROCESS-NEXT: [[TMP218:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[TMP219:%.*]] = bitcast i32 [[TMP218]] to float +; POST-PROCESS-NEXT: [[DOTSROA_012_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP219]], i32 0 ; POST-PROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; POST-PROCESS-NEXT: [[TMP328:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 -; POST-PROCESS-NEXT: [[TMP329:%.*]] = bitcast i32 [[TMP328]] to float -; POST-PROCESS-NEXT: [[DOTSROA_012_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_012_0_VEC_INSERT]], float [[TMP329]], i32 1 +; POST-PROCESS-NEXT: [[TMP220:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[TMP221:%.*]] = bitcast i32 [[TMP220]] to float +; POST-PROCESS-NEXT: [[DOTSROA_012_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_012_0_VEC_INSERT]], float [[TMP221]], i32 1 ; POST-PROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_012_4_VEC_INSERT]], 0 -; POST-PROCESS-NEXT: [[TMP330:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POST-PROCESS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP330]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) +; POST-PROCESS-NEXT: [[TMP222:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POST-PROCESS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP222]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) ; POST-PROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP1:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 ; POST-PROCESS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP1]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 @@ -1670,8 +1663,8 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: [[DOTFCA_1_1_GEP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; POST-PROCESS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP10]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 -; POST-PROCESS-NEXT: [[TMP331:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP331]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META17]] +; POST-PROCESS-NEXT: [[TMP223:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP223]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META17]] ; POST-PROCESS-NEXT: unreachable ; ; @@ -1681,410 +1674,300 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 -; POST-PROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 108 +; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 120 +; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], 108 ; POST-PROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 ; POST-PROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP5]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; POST-PROCESS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], -120 -; POST-PROCESS-NEXT: [[TMP31:%.*]] = add i32 [[TMP30]], 120 -; POST-PROCESS-NEXT: [[TMP32:%.*]] = inttoptr i32 [[TMP31]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP32]], i32 0 -; POST-PROCESS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(21) [[TMP33]], align 4 -; POST-PROCESS-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], -120 -; POST-PROCESS-NEXT: [[TMP37:%.*]] = add i32 [[TMP36]], 124 +; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-NEXT: [[TMP30:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP30]], i32 0 +; POST-PROCESS-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(21) [[TMP31]], align 4 +; POST-PROCESS-NEXT: [[TMP33:%.*]] = add i32 [[TMP6]], 4 +; POST-PROCESS-NEXT: [[TMP34:%.*]] = inttoptr i32 [[TMP33]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP34]], i32 0 +; POST-PROCESS-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(21) [[TMP35]], align 4 +; POST-PROCESS-NEXT: [[TMP37:%.*]] = add i32 [[TMP6]], 8 ; POST-PROCESS-NEXT: [[TMP38:%.*]] = inttoptr i32 [[TMP37]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP38]], i32 0 ; POST-PROCESS-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(21) [[TMP39]], align 4 -; POST-PROCESS-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], -120 -; POST-PROCESS-NEXT: [[TMP43:%.*]] = add i32 [[TMP42]], 128 -; POST-PROCESS-NEXT: [[TMP44:%.*]] = inttoptr i32 [[TMP43]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP44]], i32 0 -; POST-PROCESS-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(21) [[TMP45]], align 4 -; POST-PROCESS-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], -120 -; POST-PROCESS-NEXT: [[TMP49:%.*]] = add i32 [[TMP48]], 132 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = add i32 [[TMP6]], 12 +; POST-PROCESS-NEXT: [[TMP42:%.*]] = inttoptr i32 [[TMP41]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP42]], i32 0 +; POST-PROCESS-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(21) [[TMP43]], align 4 +; POST-PROCESS-NEXT: [[TMP45:%.*]] = add i32 [[TMP6]], 16 +; POST-PROCESS-NEXT: [[TMP46:%.*]] = inttoptr i32 [[TMP45]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP46]], i32 0 +; POST-PROCESS-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(21) [[TMP47]], align 4 +; POST-PROCESS-NEXT: [[TMP49:%.*]] = add i32 [[TMP6]], 20 ; POST-PROCESS-NEXT: [[TMP50:%.*]] = inttoptr i32 [[TMP49]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP50]], i32 0 ; POST-PROCESS-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(21) [[TMP51]], align 4 -; POST-PROCESS-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP53]], -120 -; POST-PROCESS-NEXT: [[TMP55:%.*]] = add i32 [[TMP54]], 136 -; POST-PROCESS-NEXT: [[TMP56:%.*]] = inttoptr i32 [[TMP55]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP56]], i32 0 -; POST-PROCESS-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(21) [[TMP57]], align 4 -; POST-PROCESS-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP60:%.*]] = add i32 [[TMP59]], -120 -; POST-PROCESS-NEXT: [[TMP61:%.*]] = add i32 [[TMP60]], 140 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = add i32 [[TMP6]], 24 +; POST-PROCESS-NEXT: [[TMP54:%.*]] = inttoptr i32 [[TMP53]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP54]], i32 0 +; POST-PROCESS-NEXT: [[TMP56:%.*]] = load i32, ptr addrspace(21) [[TMP55]], align 4 +; POST-PROCESS-NEXT: [[TMP57:%.*]] = add i32 [[TMP6]], 28 +; POST-PROCESS-NEXT: [[TMP58:%.*]] = inttoptr i32 [[TMP57]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP58]], i32 0 +; POST-PROCESS-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(21) [[TMP59]], align 4 +; POST-PROCESS-NEXT: [[TMP61:%.*]] = add i32 [[TMP6]], 32 ; POST-PROCESS-NEXT: [[TMP62:%.*]] = inttoptr i32 [[TMP61]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP62]], i32 0 ; POST-PROCESS-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(21) [[TMP63]], align 4 -; POST-PROCESS-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[TMP65]], -120 -; POST-PROCESS-NEXT: [[TMP67:%.*]] = add i32 [[TMP66]], 144 -; POST-PROCESS-NEXT: [[TMP68:%.*]] = inttoptr i32 [[TMP67]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP68]], i32 0 -; POST-PROCESS-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(21) [[TMP69]], align 4 -; POST-PROCESS-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], -120 -; POST-PROCESS-NEXT: [[TMP73:%.*]] = add i32 [[TMP72]], 148 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = add i32 [[TMP6]], 36 +; POST-PROCESS-NEXT: [[TMP66:%.*]] = inttoptr i32 [[TMP65]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP67:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP66]], i32 0 +; POST-PROCESS-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(21) [[TMP67]], align 4 +; POST-PROCESS-NEXT: [[TMP69:%.*]] = add i32 [[TMP6]], 40 +; POST-PROCESS-NEXT: [[TMP70:%.*]] = inttoptr i32 [[TMP69]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP70]], i32 0 +; POST-PROCESS-NEXT: [[TMP72:%.*]] = load i32, ptr addrspace(21) [[TMP71]], align 4 +; POST-PROCESS-NEXT: [[TMP73:%.*]] = add i32 [[TMP6]], 44 ; POST-PROCESS-NEXT: [[TMP74:%.*]] = inttoptr i32 [[TMP73]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP75:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP74]], i32 0 ; POST-PROCESS-NEXT: [[TMP76:%.*]] = load i32, ptr addrspace(21) [[TMP75]], align 4 -; POST-PROCESS-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[TMP77]], -120 -; POST-PROCESS-NEXT: [[TMP79:%.*]] = add i32 [[TMP78]], 152 -; POST-PROCESS-NEXT: [[TMP80:%.*]] = inttoptr i32 [[TMP79]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP80]], i32 0 -; POST-PROCESS-NEXT: [[TMP82:%.*]] = load i32, ptr addrspace(21) [[TMP81]], align 4 -; POST-PROCESS-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP84:%.*]] = add i32 [[TMP83]], -120 -; POST-PROCESS-NEXT: [[TMP85:%.*]] = add i32 [[TMP84]], 156 +; POST-PROCESS-NEXT: [[TMP77:%.*]] = add i32 [[TMP6]], 48 +; POST-PROCESS-NEXT: [[TMP78:%.*]] = inttoptr i32 [[TMP77]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP78]], i32 0 +; POST-PROCESS-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(21) [[TMP79]], align 4 +; POST-PROCESS-NEXT: [[TMP81:%.*]] = add i32 [[TMP6]], 52 +; POST-PROCESS-NEXT: [[TMP82:%.*]] = inttoptr i32 [[TMP81]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP82]], i32 0 +; POST-PROCESS-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(21) [[TMP83]], align 4 +; POST-PROCESS-NEXT: [[TMP85:%.*]] = add i32 [[TMP6]], 56 ; POST-PROCESS-NEXT: [[TMP86:%.*]] = inttoptr i32 [[TMP85]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP86]], i32 0 ; POST-PROCESS-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(21) [[TMP87]], align 4 -; POST-PROCESS-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP89]], -120 -; POST-PROCESS-NEXT: [[TMP91:%.*]] = add i32 [[TMP90]], 160 -; POST-PROCESS-NEXT: [[TMP92:%.*]] = inttoptr i32 [[TMP91]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP92]], i32 0 -; POST-PROCESS-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(21) [[TMP93]], align 4 -; POST-PROCESS-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], -120 -; POST-PROCESS-NEXT: [[TMP97:%.*]] = add i32 [[TMP96]], 164 +; POST-PROCESS-NEXT: [[TMP89:%.*]] = add i32 [[TMP6]], 60 +; POST-PROCESS-NEXT: [[TMP90:%.*]] = inttoptr i32 [[TMP89]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP90]], i32 0 +; POST-PROCESS-NEXT: [[TMP92:%.*]] = load i32, ptr addrspace(21) [[TMP91]], align 4 +; POST-PROCESS-NEXT: [[TMP93:%.*]] = add i32 [[TMP6]], 64 +; POST-PROCESS-NEXT: [[TMP94:%.*]] = inttoptr i32 [[TMP93]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP94]], i32 0 +; POST-PROCESS-NEXT: [[TMP96:%.*]] = load i32, ptr addrspace(21) [[TMP95]], align 4 +; POST-PROCESS-NEXT: [[TMP97:%.*]] = add i32 [[TMP6]], 68 ; POST-PROCESS-NEXT: [[TMP98:%.*]] = inttoptr i32 [[TMP97]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP98]], i32 0 ; POST-PROCESS-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(21) [[TMP99]], align 4 -; POST-PROCESS-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP102:%.*]] = add i32 [[TMP101]], -120 -; POST-PROCESS-NEXT: [[TMP103:%.*]] = add i32 [[TMP102]], 168 -; POST-PROCESS-NEXT: [[TMP104:%.*]] = inttoptr i32 [[TMP103]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP105:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP104]], i32 0 -; POST-PROCESS-NEXT: [[TMP106:%.*]] = load i32, ptr addrspace(21) [[TMP105]], align 4 -; POST-PROCESS-NEXT: [[TMP107:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP108:%.*]] = add i32 [[TMP107]], -120 -; POST-PROCESS-NEXT: [[TMP109:%.*]] = add i32 [[TMP108]], 172 +; POST-PROCESS-NEXT: [[TMP101:%.*]] = add i32 [[TMP6]], 72 +; POST-PROCESS-NEXT: [[TMP102:%.*]] = inttoptr i32 [[TMP101]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP102]], i32 0 +; POST-PROCESS-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(21) [[TMP103]], align 4 +; POST-PROCESS-NEXT: [[TMP105:%.*]] = add i32 [[TMP6]], 76 +; POST-PROCESS-NEXT: [[TMP106:%.*]] = inttoptr i32 [[TMP105]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP107:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP106]], i32 0 +; POST-PROCESS-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(21) [[TMP107]], align 4 +; POST-PROCESS-NEXT: [[TMP109:%.*]] = add i32 [[TMP6]], 80 ; POST-PROCESS-NEXT: [[TMP110:%.*]] = inttoptr i32 [[TMP109]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP111:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP110]], i32 0 ; POST-PROCESS-NEXT: [[TMP112:%.*]] = load i32, ptr addrspace(21) [[TMP111]], align 4 -; POST-PROCESS-NEXT: [[TMP113:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP113]], -120 -; POST-PROCESS-NEXT: [[TMP115:%.*]] = add i32 [[TMP114]], 176 -; POST-PROCESS-NEXT: [[TMP116:%.*]] = inttoptr i32 [[TMP115]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP116]], i32 0 -; POST-PROCESS-NEXT: [[TMP118:%.*]] = load i32, ptr addrspace(21) [[TMP117]], align 4 -; POST-PROCESS-NEXT: [[TMP119:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], -120 -; POST-PROCESS-NEXT: [[TMP121:%.*]] = add i32 [[TMP120]], 180 +; POST-PROCESS-NEXT: [[TMP113:%.*]] = add i32 [[TMP6]], 84 +; POST-PROCESS-NEXT: [[TMP114:%.*]] = inttoptr i32 [[TMP113]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP115:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP114]], i32 0 +; POST-PROCESS-NEXT: [[TMP116:%.*]] = load i32, ptr addrspace(21) [[TMP115]], align 4 +; POST-PROCESS-NEXT: [[TMP117:%.*]] = add i32 [[TMP6]], 88 +; POST-PROCESS-NEXT: [[TMP118:%.*]] = inttoptr i32 [[TMP117]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP119:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP118]], i32 0 +; POST-PROCESS-NEXT: [[TMP120:%.*]] = load i32, ptr addrspace(21) [[TMP119]], align 4 +; POST-PROCESS-NEXT: [[TMP121:%.*]] = add i32 [[TMP6]], 92 ; POST-PROCESS-NEXT: [[TMP122:%.*]] = inttoptr i32 [[TMP121]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP122]], i32 0 ; POST-PROCESS-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(21) [[TMP123]], align 4 -; POST-PROCESS-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP126:%.*]] = add i32 [[TMP125]], -120 -; POST-PROCESS-NEXT: [[TMP127:%.*]] = add i32 [[TMP126]], 184 -; POST-PROCESS-NEXT: [[TMP128:%.*]] = inttoptr i32 [[TMP127]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP129:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP128]], i32 0 -; POST-PROCESS-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(21) [[TMP129]], align 4 -; POST-PROCESS-NEXT: [[TMP131:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP132:%.*]] = add i32 [[TMP131]], -120 -; POST-PROCESS-NEXT: [[TMP133:%.*]] = add i32 [[TMP132]], 188 +; POST-PROCESS-NEXT: [[TMP125:%.*]] = add i32 [[TMP6]], 96 +; POST-PROCESS-NEXT: [[TMP126:%.*]] = inttoptr i32 [[TMP125]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP127:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP126]], i32 0 +; POST-PROCESS-NEXT: [[TMP128:%.*]] = load i32, ptr addrspace(21) [[TMP127]], align 4 +; POST-PROCESS-NEXT: [[TMP129:%.*]] = add i32 [[TMP6]], 100 +; POST-PROCESS-NEXT: [[TMP130:%.*]] = inttoptr i32 [[TMP129]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP130]], i32 0 +; POST-PROCESS-NEXT: [[TMP132:%.*]] = load i32, ptr addrspace(21) [[TMP131]], align 4 +; POST-PROCESS-NEXT: [[TMP133:%.*]] = add i32 [[TMP6]], 104 ; POST-PROCESS-NEXT: [[TMP134:%.*]] = inttoptr i32 [[TMP133]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP135:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP134]], i32 0 ; POST-PROCESS-NEXT: [[TMP136:%.*]] = load i32, ptr addrspace(21) [[TMP135]], align 4 ; POST-PROCESS-NEXT: [[TMP137:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP138:%.*]] = add i32 [[TMP137]], -120 -; POST-PROCESS-NEXT: [[TMP139:%.*]] = add i32 [[TMP138]], 192 -; POST-PROCESS-NEXT: [[TMP140:%.*]] = inttoptr i32 [[TMP139]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP141:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP140]], i32 0 -; POST-PROCESS-NEXT: [[TMP142:%.*]] = load i32, ptr addrspace(21) [[TMP141]], align 4 -; POST-PROCESS-NEXT: [[TMP143:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP144:%.*]] = add i32 [[TMP143]], -120 -; POST-PROCESS-NEXT: [[TMP145:%.*]] = add i32 [[TMP144]], 196 -; POST-PROCESS-NEXT: [[TMP146:%.*]] = inttoptr i32 [[TMP145]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP147:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP146]], i32 0 -; POST-PROCESS-NEXT: [[TMP148:%.*]] = load i32, ptr addrspace(21) [[TMP147]], align 4 -; POST-PROCESS-NEXT: [[TMP149:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP150:%.*]] = add i32 [[TMP149]], -120 -; POST-PROCESS-NEXT: [[TMP151:%.*]] = add i32 [[TMP150]], 200 +; POST-PROCESS-NEXT: [[TMP138:%.*]] = add i32 [[TMP1]], 116 +; POST-PROCESS-NEXT: [[TMP139:%.*]] = inttoptr i32 [[TMP138]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP139]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP137]], ptr addrspace(21) [[TMP140]], align 4 +; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 +; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 +; POST-PROCESS-NEXT: [[DOTSROA_053_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; POST-PROCESS-NEXT: [[TMP141:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[DOTSROA_053_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; POST-PROCESS-NEXT: [[TMP142:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POST-PROCESS-NEXT: [[TMP143:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; POST-PROCESS-NEXT: [[TMP144:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POST-PROCESS-NEXT: [[TMP145:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP143]]) +; POST-PROCESS-NEXT: [[TMP146:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP145]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POST-PROCESS-NEXT: [[TMP147:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP146]]) +; POST-PROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; POST-PROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 +; POST-PROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; POST-PROCESS-NEXT: store i32 [[TMP1]], ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP148:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP29]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-NEXT: [[TMP149:%.*]] = inttoptr i32 [[TMP148]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP150:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP149]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP32]], ptr addrspace(21) [[TMP150]], align 4 +; POST-PROCESS-NEXT: [[TMP151:%.*]] = add i32 [[TMP148]], 4 ; POST-PROCESS-NEXT: [[TMP152:%.*]] = inttoptr i32 [[TMP151]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP153:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP152]], i32 0 -; POST-PROCESS-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(21) [[TMP153]], align 4 -; POST-PROCESS-NEXT: [[TMP155:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP156:%.*]] = add i32 [[TMP155]], -120 -; POST-PROCESS-NEXT: [[TMP157:%.*]] = add i32 [[TMP156]], 204 +; POST-PROCESS-NEXT: store i32 [[TMP36]], ptr addrspace(21) [[TMP153]], align 4 +; POST-PROCESS-NEXT: [[TMP154:%.*]] = add i32 [[TMP148]], 8 +; POST-PROCESS-NEXT: [[TMP155:%.*]] = inttoptr i32 [[TMP154]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP156:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP155]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP40]], ptr addrspace(21) [[TMP156]], align 4 +; POST-PROCESS-NEXT: [[TMP157:%.*]] = add i32 [[TMP148]], 12 ; POST-PROCESS-NEXT: [[TMP158:%.*]] = inttoptr i32 [[TMP157]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP159:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP158]], i32 0 -; POST-PROCESS-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(21) [[TMP159]], align 4 -; POST-PROCESS-NEXT: [[TMP161:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP162:%.*]] = add i32 [[TMP161]], -120 -; POST-PROCESS-NEXT: [[TMP163:%.*]] = add i32 [[TMP162]], 208 +; POST-PROCESS-NEXT: store i32 [[TMP44]], ptr addrspace(21) [[TMP159]], align 4 +; POST-PROCESS-NEXT: [[TMP160:%.*]] = add i32 [[TMP148]], 16 +; POST-PROCESS-NEXT: [[TMP161:%.*]] = inttoptr i32 [[TMP160]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP162:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP161]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP48]], ptr addrspace(21) [[TMP162]], align 4 +; POST-PROCESS-NEXT: [[TMP163:%.*]] = add i32 [[TMP148]], 20 ; POST-PROCESS-NEXT: [[TMP164:%.*]] = inttoptr i32 [[TMP163]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP165:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP164]], i32 0 -; POST-PROCESS-NEXT: [[TMP166:%.*]] = load i32, ptr addrspace(21) [[TMP165]], align 4 -; POST-PROCESS-NEXT: [[TMP167:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP168:%.*]] = add i32 [[TMP167]], -120 -; POST-PROCESS-NEXT: [[TMP169:%.*]] = add i32 [[TMP168]], 212 +; POST-PROCESS-NEXT: store i32 [[TMP52]], ptr addrspace(21) [[TMP165]], align 4 +; POST-PROCESS-NEXT: [[TMP166:%.*]] = add i32 [[TMP148]], 24 +; POST-PROCESS-NEXT: [[TMP167:%.*]] = inttoptr i32 [[TMP166]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP168:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP167]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP56]], ptr addrspace(21) [[TMP168]], align 4 +; POST-PROCESS-NEXT: [[TMP169:%.*]] = add i32 [[TMP148]], 28 ; POST-PROCESS-NEXT: [[TMP170:%.*]] = inttoptr i32 [[TMP169]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP171:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP170]], i32 0 -; POST-PROCESS-NEXT: [[TMP172:%.*]] = load i32, ptr addrspace(21) [[TMP171]], align 4 -; POST-PROCESS-NEXT: [[TMP173:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP174:%.*]] = add i32 [[TMP173]], -120 -; POST-PROCESS-NEXT: [[TMP175:%.*]] = add i32 [[TMP174]], 216 +; POST-PROCESS-NEXT: store i32 [[TMP60]], ptr addrspace(21) [[TMP171]], align 4 +; POST-PROCESS-NEXT: [[TMP172:%.*]] = add i32 [[TMP148]], 32 +; POST-PROCESS-NEXT: [[TMP173:%.*]] = inttoptr i32 [[TMP172]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP174:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP173]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP64]], ptr addrspace(21) [[TMP174]], align 4 +; POST-PROCESS-NEXT: [[TMP175:%.*]] = add i32 [[TMP148]], 36 ; POST-PROCESS-NEXT: [[TMP176:%.*]] = inttoptr i32 [[TMP175]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP177:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP176]], i32 0 -; POST-PROCESS-NEXT: [[TMP178:%.*]] = load i32, ptr addrspace(21) [[TMP177]], align 4 -; POST-PROCESS-NEXT: [[TMP179:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP180:%.*]] = add i32 [[TMP179]], -120 -; POST-PROCESS-NEXT: [[TMP181:%.*]] = add i32 [[TMP180]], 220 +; POST-PROCESS-NEXT: store i32 [[TMP68]], ptr addrspace(21) [[TMP177]], align 4 +; POST-PROCESS-NEXT: [[TMP178:%.*]] = add i32 [[TMP148]], 40 +; POST-PROCESS-NEXT: [[TMP179:%.*]] = inttoptr i32 [[TMP178]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP180:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP179]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP72]], ptr addrspace(21) [[TMP180]], align 4 +; POST-PROCESS-NEXT: [[TMP181:%.*]] = add i32 [[TMP148]], 44 ; POST-PROCESS-NEXT: [[TMP182:%.*]] = inttoptr i32 [[TMP181]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP183:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP182]], i32 0 -; POST-PROCESS-NEXT: [[TMP184:%.*]] = load i32, ptr addrspace(21) [[TMP183]], align 4 -; POST-PROCESS-NEXT: [[TMP185:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP186:%.*]] = add i32 [[TMP185]], -120 -; POST-PROCESS-NEXT: [[TMP187:%.*]] = add i32 [[TMP186]], 224 +; POST-PROCESS-NEXT: store i32 [[TMP76]], ptr addrspace(21) [[TMP183]], align 4 +; POST-PROCESS-NEXT: [[TMP184:%.*]] = add i32 [[TMP148]], 48 +; POST-PROCESS-NEXT: [[TMP185:%.*]] = inttoptr i32 [[TMP184]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP186:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP185]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP80]], ptr addrspace(21) [[TMP186]], align 4 +; POST-PROCESS-NEXT: [[TMP187:%.*]] = add i32 [[TMP148]], 52 ; POST-PROCESS-NEXT: [[TMP188:%.*]] = inttoptr i32 [[TMP187]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP189:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP188]], i32 0 -; POST-PROCESS-NEXT: [[TMP190:%.*]] = load i32, ptr addrspace(21) [[TMP189]], align 4 -; POST-PROCESS-NEXT: [[TMP191:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP192:%.*]] = add i32 [[TMP2]], 116 -; POST-PROCESS-NEXT: [[TMP193:%.*]] = inttoptr i32 [[TMP192]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP194:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP193]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP191]], ptr addrspace(21) [[TMP194]], align 4 -; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 -; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 -; POST-PROCESS-NEXT: [[DOTSROA_053_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; POST-PROCESS-NEXT: [[TMP195:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 -; POST-PROCESS-NEXT: [[DOTSROA_053_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; POST-PROCESS-NEXT: [[TMP196:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 -; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POST-PROCESS-NEXT: [[TMP197:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; POST-PROCESS-NEXT: [[TMP198:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; POST-PROCESS-NEXT: [[TMP199:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP197]]) -; POST-PROCESS-NEXT: [[TMP200:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP199]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; POST-PROCESS-NEXT: [[TMP201:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP200]]) -; POST-PROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 -; POST-PROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 -; POST-PROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP28]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP202:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP203:%.*]] = add i32 [[TMP202]], -120 -; POST-PROCESS-NEXT: [[TMP204:%.*]] = add i32 [[TMP203]], 120 -; POST-PROCESS-NEXT: [[TMP205:%.*]] = inttoptr i32 [[TMP204]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP206:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP205]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP34]], ptr addrspace(21) [[TMP206]], align 4 -; POST-PROCESS-NEXT: [[TMP207:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP208:%.*]] = add i32 [[TMP207]], -120 -; POST-PROCESS-NEXT: [[TMP209:%.*]] = add i32 [[TMP208]], 124 -; POST-PROCESS-NEXT: [[TMP210:%.*]] = inttoptr i32 [[TMP209]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP211:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP210]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP40]], ptr addrspace(21) [[TMP211]], align 4 -; POST-PROCESS-NEXT: [[TMP212:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP213:%.*]] = add i32 [[TMP212]], -120 -; POST-PROCESS-NEXT: [[TMP214:%.*]] = add i32 [[TMP213]], 128 +; POST-PROCESS-NEXT: store i32 [[TMP84]], ptr addrspace(21) [[TMP189]], align 4 +; POST-PROCESS-NEXT: [[TMP190:%.*]] = add i32 [[TMP148]], 56 +; POST-PROCESS-NEXT: [[TMP191:%.*]] = inttoptr i32 [[TMP190]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP192:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP191]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP88]], ptr addrspace(21) [[TMP192]], align 4 +; POST-PROCESS-NEXT: [[TMP193:%.*]] = add i32 [[TMP148]], 60 +; POST-PROCESS-NEXT: [[TMP194:%.*]] = inttoptr i32 [[TMP193]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP195:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP194]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP92]], ptr addrspace(21) [[TMP195]], align 4 +; POST-PROCESS-NEXT: [[TMP196:%.*]] = add i32 [[TMP148]], 64 +; POST-PROCESS-NEXT: [[TMP197:%.*]] = inttoptr i32 [[TMP196]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP198:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP197]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP96]], ptr addrspace(21) [[TMP198]], align 4 +; POST-PROCESS-NEXT: [[TMP199:%.*]] = add i32 [[TMP148]], 68 +; POST-PROCESS-NEXT: [[TMP200:%.*]] = inttoptr i32 [[TMP199]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP201:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP200]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP100]], ptr addrspace(21) [[TMP201]], align 4 +; POST-PROCESS-NEXT: [[TMP202:%.*]] = add i32 [[TMP148]], 72 +; POST-PROCESS-NEXT: [[TMP203:%.*]] = inttoptr i32 [[TMP202]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP204:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP203]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP104]], ptr addrspace(21) [[TMP204]], align 4 +; POST-PROCESS-NEXT: [[TMP205:%.*]] = add i32 [[TMP148]], 76 +; POST-PROCESS-NEXT: [[TMP206:%.*]] = inttoptr i32 [[TMP205]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP207:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP206]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP108]], ptr addrspace(21) [[TMP207]], align 4 +; POST-PROCESS-NEXT: [[TMP208:%.*]] = add i32 [[TMP148]], 80 +; POST-PROCESS-NEXT: [[TMP209:%.*]] = inttoptr i32 [[TMP208]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP210:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP209]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP112]], ptr addrspace(21) [[TMP210]], align 4 +; POST-PROCESS-NEXT: [[TMP211:%.*]] = add i32 [[TMP148]], 84 +; POST-PROCESS-NEXT: [[TMP212:%.*]] = inttoptr i32 [[TMP211]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP213:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP212]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP116]], ptr addrspace(21) [[TMP213]], align 4 +; POST-PROCESS-NEXT: [[TMP214:%.*]] = add i32 [[TMP148]], 88 ; POST-PROCESS-NEXT: [[TMP215:%.*]] = inttoptr i32 [[TMP214]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP216:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP215]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP46]], ptr addrspace(21) [[TMP216]], align 4 -; POST-PROCESS-NEXT: [[TMP217:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP218:%.*]] = add i32 [[TMP217]], -120 -; POST-PROCESS-NEXT: [[TMP219:%.*]] = add i32 [[TMP218]], 132 -; POST-PROCESS-NEXT: [[TMP220:%.*]] = inttoptr i32 [[TMP219]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP221:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP220]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP52]], ptr addrspace(21) [[TMP221]], align 4 -; POST-PROCESS-NEXT: [[TMP222:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP223:%.*]] = add i32 [[TMP222]], -120 -; POST-PROCESS-NEXT: [[TMP224:%.*]] = add i32 [[TMP223]], 136 -; POST-PROCESS-NEXT: [[TMP225:%.*]] = inttoptr i32 [[TMP224]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP226:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP225]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP58]], ptr addrspace(21) [[TMP226]], align 4 -; POST-PROCESS-NEXT: [[TMP227:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP228:%.*]] = add i32 [[TMP227]], -120 -; POST-PROCESS-NEXT: [[TMP229:%.*]] = add i32 [[TMP228]], 140 -; POST-PROCESS-NEXT: [[TMP230:%.*]] = inttoptr i32 [[TMP229]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP231:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP230]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP64]], ptr addrspace(21) [[TMP231]], align 4 -; POST-PROCESS-NEXT: [[TMP232:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP233:%.*]] = add i32 [[TMP232]], -120 -; POST-PROCESS-NEXT: [[TMP234:%.*]] = add i32 [[TMP233]], 144 -; POST-PROCESS-NEXT: [[TMP235:%.*]] = inttoptr i32 [[TMP234]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP236:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP235]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP70]], ptr addrspace(21) [[TMP236]], align 4 -; POST-PROCESS-NEXT: [[TMP237:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP238:%.*]] = add i32 [[TMP237]], -120 -; POST-PROCESS-NEXT: [[TMP239:%.*]] = add i32 [[TMP238]], 148 -; POST-PROCESS-NEXT: [[TMP240:%.*]] = inttoptr i32 [[TMP239]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP241:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP240]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP76]], ptr addrspace(21) [[TMP241]], align 4 -; POST-PROCESS-NEXT: [[TMP242:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP243:%.*]] = add i32 [[TMP242]], -120 -; POST-PROCESS-NEXT: [[TMP244:%.*]] = add i32 [[TMP243]], 152 -; POST-PROCESS-NEXT: [[TMP245:%.*]] = inttoptr i32 [[TMP244]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP246:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP245]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP82]], ptr addrspace(21) [[TMP246]], align 4 -; POST-PROCESS-NEXT: [[TMP247:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP248:%.*]] = add i32 [[TMP247]], -120 -; POST-PROCESS-NEXT: [[TMP249:%.*]] = add i32 [[TMP248]], 156 -; POST-PROCESS-NEXT: [[TMP250:%.*]] = inttoptr i32 [[TMP249]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP251:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP250]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP88]], ptr addrspace(21) [[TMP251]], align 4 -; POST-PROCESS-NEXT: [[TMP252:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP253:%.*]] = add i32 [[TMP252]], -120 -; POST-PROCESS-NEXT: [[TMP254:%.*]] = add i32 [[TMP253]], 160 -; POST-PROCESS-NEXT: [[TMP255:%.*]] = inttoptr i32 [[TMP254]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP256:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP255]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP94]], ptr addrspace(21) [[TMP256]], align 4 -; POST-PROCESS-NEXT: [[TMP257:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP258:%.*]] = add i32 [[TMP257]], -120 -; POST-PROCESS-NEXT: [[TMP259:%.*]] = add i32 [[TMP258]], 164 -; POST-PROCESS-NEXT: [[TMP260:%.*]] = inttoptr i32 [[TMP259]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP261:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP260]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP100]], ptr addrspace(21) [[TMP261]], align 4 -; POST-PROCESS-NEXT: [[TMP262:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP263:%.*]] = add i32 [[TMP262]], -120 -; POST-PROCESS-NEXT: [[TMP264:%.*]] = add i32 [[TMP263]], 168 -; POST-PROCESS-NEXT: [[TMP265:%.*]] = inttoptr i32 [[TMP264]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP266:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP265]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP106]], ptr addrspace(21) [[TMP266]], align 4 -; POST-PROCESS-NEXT: [[TMP267:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP268:%.*]] = add i32 [[TMP267]], -120 -; POST-PROCESS-NEXT: [[TMP269:%.*]] = add i32 [[TMP268]], 172 -; POST-PROCESS-NEXT: [[TMP270:%.*]] = inttoptr i32 [[TMP269]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP271:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP270]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP112]], ptr addrspace(21) [[TMP271]], align 4 -; POST-PROCESS-NEXT: [[TMP272:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP273:%.*]] = add i32 [[TMP272]], -120 -; POST-PROCESS-NEXT: [[TMP274:%.*]] = add i32 [[TMP273]], 176 -; POST-PROCESS-NEXT: [[TMP275:%.*]] = inttoptr i32 [[TMP274]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP276:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP275]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP118]], ptr addrspace(21) [[TMP276]], align 4 -; POST-PROCESS-NEXT: [[TMP277:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP278:%.*]] = add i32 [[TMP277]], -120 -; POST-PROCESS-NEXT: [[TMP279:%.*]] = add i32 [[TMP278]], 180 -; POST-PROCESS-NEXT: [[TMP280:%.*]] = inttoptr i32 [[TMP279]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP281:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP280]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP124]], ptr addrspace(21) [[TMP281]], align 4 -; POST-PROCESS-NEXT: [[TMP282:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP283:%.*]] = add i32 [[TMP282]], -120 -; POST-PROCESS-NEXT: [[TMP284:%.*]] = add i32 [[TMP283]], 184 -; POST-PROCESS-NEXT: [[TMP285:%.*]] = inttoptr i32 [[TMP284]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP286:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP285]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP130]], ptr addrspace(21) [[TMP286]], align 4 -; POST-PROCESS-NEXT: [[TMP287:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP288:%.*]] = add i32 [[TMP287]], -120 -; POST-PROCESS-NEXT: [[TMP289:%.*]] = add i32 [[TMP288]], 188 -; POST-PROCESS-NEXT: [[TMP290:%.*]] = inttoptr i32 [[TMP289]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP291:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP290]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP136]], ptr addrspace(21) [[TMP291]], align 4 -; POST-PROCESS-NEXT: [[TMP292:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP293:%.*]] = add i32 [[TMP292]], -120 -; POST-PROCESS-NEXT: [[TMP294:%.*]] = add i32 [[TMP293]], 192 -; POST-PROCESS-NEXT: [[TMP295:%.*]] = inttoptr i32 [[TMP294]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP296:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP295]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP142]], ptr addrspace(21) [[TMP296]], align 4 -; POST-PROCESS-NEXT: [[TMP297:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP298:%.*]] = add i32 [[TMP297]], -120 -; POST-PROCESS-NEXT: [[TMP299:%.*]] = add i32 [[TMP298]], 196 -; POST-PROCESS-NEXT: [[TMP300:%.*]] = inttoptr i32 [[TMP299]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP301:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP300]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP148]], ptr addrspace(21) [[TMP301]], align 4 -; POST-PROCESS-NEXT: [[TMP302:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP303:%.*]] = add i32 [[TMP302]], -120 -; POST-PROCESS-NEXT: [[TMP304:%.*]] = add i32 [[TMP303]], 200 -; POST-PROCESS-NEXT: [[TMP305:%.*]] = inttoptr i32 [[TMP304]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP306:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP305]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP154]], ptr addrspace(21) [[TMP306]], align 4 -; POST-PROCESS-NEXT: [[TMP307:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP308:%.*]] = add i32 [[TMP307]], -120 -; POST-PROCESS-NEXT: [[TMP309:%.*]] = add i32 [[TMP308]], 204 -; POST-PROCESS-NEXT: [[TMP310:%.*]] = inttoptr i32 [[TMP309]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP311:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP310]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP160]], ptr addrspace(21) [[TMP311]], align 4 -; POST-PROCESS-NEXT: [[TMP312:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP313:%.*]] = add i32 [[TMP312]], -120 -; POST-PROCESS-NEXT: [[TMP314:%.*]] = add i32 [[TMP313]], 208 -; POST-PROCESS-NEXT: [[TMP315:%.*]] = inttoptr i32 [[TMP314]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP316:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP315]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP166]], ptr addrspace(21) [[TMP316]], align 4 -; POST-PROCESS-NEXT: [[TMP317:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP318:%.*]] = add i32 [[TMP317]], -120 -; POST-PROCESS-NEXT: [[TMP319:%.*]] = add i32 [[TMP318]], 212 -; POST-PROCESS-NEXT: [[TMP320:%.*]] = inttoptr i32 [[TMP319]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP321:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP320]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP172]], ptr addrspace(21) [[TMP321]], align 4 -; POST-PROCESS-NEXT: [[TMP322:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP323:%.*]] = add i32 [[TMP322]], -120 -; POST-PROCESS-NEXT: [[TMP324:%.*]] = add i32 [[TMP323]], 216 -; POST-PROCESS-NEXT: [[TMP325:%.*]] = inttoptr i32 [[TMP324]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP326:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP325]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP178]], ptr addrspace(21) [[TMP326]], align 4 -; POST-PROCESS-NEXT: [[TMP327:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP328:%.*]] = add i32 [[TMP327]], -120 -; POST-PROCESS-NEXT: [[TMP329:%.*]] = add i32 [[TMP328]], 220 -; POST-PROCESS-NEXT: [[TMP330:%.*]] = inttoptr i32 [[TMP329]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP331:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP330]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP184]], ptr addrspace(21) [[TMP331]], align 4 -; POST-PROCESS-NEXT: [[TMP332:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP333:%.*]] = add i32 [[TMP332]], -120 -; POST-PROCESS-NEXT: [[TMP334:%.*]] = add i32 [[TMP333]], 224 -; POST-PROCESS-NEXT: [[TMP335:%.*]] = inttoptr i32 [[TMP334]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP336:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP335]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP190]], ptr addrspace(21) [[TMP336]], align 4 -; POST-PROCESS-NEXT: [[TMP337:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP338:%.*]] = add i32 [[TMP337]], 120 -; POST-PROCESS-NEXT: store i32 [[TMP338]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP339:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP340:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @ClosestHit.resume.0 to i64)) -; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP339]], i64 [[TMP340]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 +; POST-PROCESS-NEXT: store i32 [[TMP120]], ptr addrspace(21) [[TMP216]], align 4 +; POST-PROCESS-NEXT: [[TMP217:%.*]] = add i32 [[TMP148]], 92 +; POST-PROCESS-NEXT: [[TMP218:%.*]] = inttoptr i32 [[TMP217]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP219:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP218]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP124]], ptr addrspace(21) [[TMP219]], align 4 +; POST-PROCESS-NEXT: [[TMP220:%.*]] = add i32 [[TMP148]], 96 +; POST-PROCESS-NEXT: [[TMP221:%.*]] = inttoptr i32 [[TMP220]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP222:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP221]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP128]], ptr addrspace(21) [[TMP222]], align 4 +; POST-PROCESS-NEXT: [[TMP223:%.*]] = add i32 [[TMP148]], 100 +; POST-PROCESS-NEXT: [[TMP224:%.*]] = inttoptr i32 [[TMP223]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP225:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP224]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP132]], ptr addrspace(21) [[TMP225]], align 4 +; POST-PROCESS-NEXT: [[TMP226:%.*]] = add i32 [[TMP148]], 104 +; POST-PROCESS-NEXT: [[TMP227:%.*]] = inttoptr i32 [[TMP226]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP228:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP227]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP136]], ptr addrspace(21) [[TMP228]], align 4 +; POST-PROCESS-NEXT: [[TMP229:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP230:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @ClosestHit.resume.0 to i64)) +; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP229]], i64 [[TMP230]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount !17 ; POST-PROCESS-NEXT: unreachable ; ; @@ -2095,393 +1978,286 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -120 -; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 -; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], -120 -; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], 120 +; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP27]], i32 0 +; POST-PROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(21) [[TMP28]], align 4 +; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP3]], 4 ; POST-PROCESS-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP31]], i32 0 ; POST-PROCESS-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(21) [[TMP32]], align 4 -; POST-PROCESS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], -120 -; POST-PROCESS-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], 124 -; POST-PROCESS-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP36]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP37]], i32 0 -; POST-PROCESS-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(21) [[TMP38]], align 4 -; POST-PROCESS-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], -120 -; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], 128 +; POST-PROCESS-NEXT: [[TMP34:%.*]] = add i32 [[TMP3]], 8 +; POST-PROCESS-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP34]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP35]], i32 0 +; POST-PROCESS-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(21) [[TMP36]], align 4 +; POST-PROCESS-NEXT: [[TMP38:%.*]] = add i32 [[TMP3]], 12 +; POST-PROCESS-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP38]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP39]], i32 0 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(21) [[TMP40]], align 4 +; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[TMP3]], 16 ; POST-PROCESS-NEXT: [[TMP43:%.*]] = inttoptr i32 [[TMP42]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP43]], i32 0 ; POST-PROCESS-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(21) [[TMP44]], align 4 -; POST-PROCESS-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], -120 -; POST-PROCESS-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 132 -; POST-PROCESS-NEXT: [[TMP49:%.*]] = inttoptr i32 [[TMP48]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP49]], i32 0 -; POST-PROCESS-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(21) [[TMP50]], align 4 -; POST-PROCESS-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP53:%.*]] = add i32 [[TMP52]], -120 -; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP53]], 136 +; POST-PROCESS-NEXT: [[TMP46:%.*]] = add i32 [[TMP3]], 20 +; POST-PROCESS-NEXT: [[TMP47:%.*]] = inttoptr i32 [[TMP46]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP47]], i32 0 +; POST-PROCESS-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(21) [[TMP48]], align 4 +; POST-PROCESS-NEXT: [[TMP50:%.*]] = add i32 [[TMP3]], 24 +; POST-PROCESS-NEXT: [[TMP51:%.*]] = inttoptr i32 [[TMP50]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP51]], i32 0 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(21) [[TMP52]], align 4 +; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP3]], 28 ; POST-PROCESS-NEXT: [[TMP55:%.*]] = inttoptr i32 [[TMP54]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP55]], i32 0 ; POST-PROCESS-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(21) [[TMP56]], align 4 -; POST-PROCESS-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP59:%.*]] = add i32 [[TMP58]], -120 -; POST-PROCESS-NEXT: [[TMP60:%.*]] = add i32 [[TMP59]], 140 -; POST-PROCESS-NEXT: [[TMP61:%.*]] = inttoptr i32 [[TMP60]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP61]], i32 0 -; POST-PROCESS-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(21) [[TMP62]], align 4 -; POST-PROCESS-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP65:%.*]] = add i32 [[TMP64]], -120 -; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[TMP65]], 144 +; POST-PROCESS-NEXT: [[TMP58:%.*]] = add i32 [[TMP3]], 32 +; POST-PROCESS-NEXT: [[TMP59:%.*]] = inttoptr i32 [[TMP58]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP59]], i32 0 +; POST-PROCESS-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(21) [[TMP60]], align 4 +; POST-PROCESS-NEXT: [[TMP62:%.*]] = add i32 [[TMP3]], 36 +; POST-PROCESS-NEXT: [[TMP63:%.*]] = inttoptr i32 [[TMP62]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP63]], i32 0 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(21) [[TMP64]], align 4 +; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[TMP3]], 40 ; POST-PROCESS-NEXT: [[TMP67:%.*]] = inttoptr i32 [[TMP66]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP67]], i32 0 ; POST-PROCESS-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(21) [[TMP68]], align 4 -; POST-PROCESS-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP71:%.*]] = add i32 [[TMP70]], -120 -; POST-PROCESS-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 148 -; POST-PROCESS-NEXT: [[TMP73:%.*]] = inttoptr i32 [[TMP72]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP73]], i32 0 -; POST-PROCESS-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(21) [[TMP74]], align 4 -; POST-PROCESS-NEXT: [[TMP76:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP77:%.*]] = add i32 [[TMP76]], -120 -; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[TMP77]], 152 +; POST-PROCESS-NEXT: [[TMP70:%.*]] = add i32 [[TMP3]], 44 +; POST-PROCESS-NEXT: [[TMP71:%.*]] = inttoptr i32 [[TMP70]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP71]], i32 0 +; POST-PROCESS-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(21) [[TMP72]], align 4 +; POST-PROCESS-NEXT: [[TMP74:%.*]] = add i32 [[TMP3]], 48 +; POST-PROCESS-NEXT: [[TMP75:%.*]] = inttoptr i32 [[TMP74]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP75]], i32 0 +; POST-PROCESS-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(21) [[TMP76]], align 4 +; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[TMP3]], 52 ; POST-PROCESS-NEXT: [[TMP79:%.*]] = inttoptr i32 [[TMP78]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP79]], i32 0 ; POST-PROCESS-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(21) [[TMP80]], align 4 -; POST-PROCESS-NEXT: [[TMP82:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP83:%.*]] = add i32 [[TMP82]], -120 -; POST-PROCESS-NEXT: [[TMP84:%.*]] = add i32 [[TMP83]], 156 -; POST-PROCESS-NEXT: [[TMP85:%.*]] = inttoptr i32 [[TMP84]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP85]], i32 0 -; POST-PROCESS-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(21) [[TMP86]], align 4 -; POST-PROCESS-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP89:%.*]] = add i32 [[TMP88]], -120 -; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP89]], 160 +; POST-PROCESS-NEXT: [[TMP82:%.*]] = add i32 [[TMP3]], 56 +; POST-PROCESS-NEXT: [[TMP83:%.*]] = inttoptr i32 [[TMP82]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP83]], i32 0 +; POST-PROCESS-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(21) [[TMP84]], align 4 +; POST-PROCESS-NEXT: [[TMP86:%.*]] = add i32 [[TMP3]], 60 +; POST-PROCESS-NEXT: [[TMP87:%.*]] = inttoptr i32 [[TMP86]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP87]], i32 0 +; POST-PROCESS-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(21) [[TMP88]], align 4 +; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP3]], 64 ; POST-PROCESS-NEXT: [[TMP91:%.*]] = inttoptr i32 [[TMP90]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP91]], i32 0 ; POST-PROCESS-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(21) [[TMP92]], align 4 -; POST-PROCESS-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP95:%.*]] = add i32 [[TMP94]], -120 -; POST-PROCESS-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], 164 -; POST-PROCESS-NEXT: [[TMP97:%.*]] = inttoptr i32 [[TMP96]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP97]], i32 0 -; POST-PROCESS-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(21) [[TMP98]], align 4 -; POST-PROCESS-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP101:%.*]] = add i32 [[TMP100]], -120 -; POST-PROCESS-NEXT: [[TMP102:%.*]] = add i32 [[TMP101]], 168 +; POST-PROCESS-NEXT: [[TMP94:%.*]] = add i32 [[TMP3]], 68 +; POST-PROCESS-NEXT: [[TMP95:%.*]] = inttoptr i32 [[TMP94]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP95]], i32 0 +; POST-PROCESS-NEXT: [[TMP97:%.*]] = load i32, ptr addrspace(21) [[TMP96]], align 4 +; POST-PROCESS-NEXT: [[TMP98:%.*]] = add i32 [[TMP3]], 72 +; POST-PROCESS-NEXT: [[TMP99:%.*]] = inttoptr i32 [[TMP98]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP99]], i32 0 +; POST-PROCESS-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(21) [[TMP100]], align 4 +; POST-PROCESS-NEXT: [[TMP102:%.*]] = add i32 [[TMP3]], 76 ; POST-PROCESS-NEXT: [[TMP103:%.*]] = inttoptr i32 [[TMP102]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP103]], i32 0 ; POST-PROCESS-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(21) [[TMP104]], align 4 -; POST-PROCESS-NEXT: [[TMP106:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP107:%.*]] = add i32 [[TMP106]], -120 -; POST-PROCESS-NEXT: [[TMP108:%.*]] = add i32 [[TMP107]], 172 -; POST-PROCESS-NEXT: [[TMP109:%.*]] = inttoptr i32 [[TMP108]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP110:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP109]], i32 0 -; POST-PROCESS-NEXT: [[TMP111:%.*]] = load i32, ptr addrspace(21) [[TMP110]], align 4 -; POST-PROCESS-NEXT: [[TMP112:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP113:%.*]] = add i32 [[TMP112]], -120 -; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP113]], 176 +; POST-PROCESS-NEXT: [[TMP106:%.*]] = add i32 [[TMP3]], 80 +; POST-PROCESS-NEXT: [[TMP107:%.*]] = inttoptr i32 [[TMP106]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP107]], i32 0 +; POST-PROCESS-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(21) [[TMP108]], align 4 +; POST-PROCESS-NEXT: [[TMP110:%.*]] = add i32 [[TMP3]], 84 +; POST-PROCESS-NEXT: [[TMP111:%.*]] = inttoptr i32 [[TMP110]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP112:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP111]], i32 0 +; POST-PROCESS-NEXT: [[TMP113:%.*]] = load i32, ptr addrspace(21) [[TMP112]], align 4 +; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP3]], 88 ; POST-PROCESS-NEXT: [[TMP115:%.*]] = inttoptr i32 [[TMP114]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP116:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP115]], i32 0 ; POST-PROCESS-NEXT: [[TMP117:%.*]] = load i32, ptr addrspace(21) [[TMP116]], align 4 -; POST-PROCESS-NEXT: [[TMP118:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP119:%.*]] = add i32 [[TMP118]], -120 -; POST-PROCESS-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], 180 -; POST-PROCESS-NEXT: [[TMP121:%.*]] = inttoptr i32 [[TMP120]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP122:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP121]], i32 0 -; POST-PROCESS-NEXT: [[TMP123:%.*]] = load i32, ptr addrspace(21) [[TMP122]], align 4 -; POST-PROCESS-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP125:%.*]] = add i32 [[TMP124]], -120 -; POST-PROCESS-NEXT: [[TMP126:%.*]] = add i32 [[TMP125]], 184 +; POST-PROCESS-NEXT: [[TMP118:%.*]] = add i32 [[TMP3]], 92 +; POST-PROCESS-NEXT: [[TMP119:%.*]] = inttoptr i32 [[TMP118]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP120:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP119]], i32 0 +; POST-PROCESS-NEXT: [[TMP121:%.*]] = load i32, ptr addrspace(21) [[TMP120]], align 4 +; POST-PROCESS-NEXT: [[TMP122:%.*]] = add i32 [[TMP3]], 96 +; POST-PROCESS-NEXT: [[TMP123:%.*]] = inttoptr i32 [[TMP122]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP124:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP123]], i32 0 +; POST-PROCESS-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(21) [[TMP124]], align 4 +; POST-PROCESS-NEXT: [[TMP126:%.*]] = add i32 [[TMP3]], 100 ; POST-PROCESS-NEXT: [[TMP127:%.*]] = inttoptr i32 [[TMP126]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP127]], i32 0 ; POST-PROCESS-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(21) [[TMP128]], align 4 -; POST-PROCESS-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP131:%.*]] = add i32 [[TMP130]], -120 -; POST-PROCESS-NEXT: [[TMP132:%.*]] = add i32 [[TMP131]], 188 -; POST-PROCESS-NEXT: [[TMP133:%.*]] = inttoptr i32 [[TMP132]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP134:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP133]], i32 0 -; POST-PROCESS-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(21) [[TMP134]], align 4 -; POST-PROCESS-NEXT: [[TMP136:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP137:%.*]] = add i32 [[TMP136]], -120 -; POST-PROCESS-NEXT: [[TMP138:%.*]] = add i32 [[TMP137]], 192 +; POST-PROCESS-NEXT: [[TMP130:%.*]] = add i32 [[TMP3]], 104 +; POST-PROCESS-NEXT: [[TMP131:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP132:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP131]], i32 0 +; POST-PROCESS-NEXT: [[TMP133:%.*]] = load i32, ptr addrspace(21) [[TMP132]], align 4 +; POST-PROCESS-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POST-PROCESS-NEXT: [[TMP135:%.*]] = add i32 [[TMP2]], 116 +; POST-PROCESS-NEXT: [[TMP136:%.*]] = inttoptr i32 [[TMP135]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP137:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP136]], i32 0 +; POST-PROCESS-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP137]], align 4 +; POST-PROCESS-NEXT: [[TMP138:%.*]] = add i32 [[TMP2]], 108 ; POST-PROCESS-NEXT: [[TMP139:%.*]] = inttoptr i32 [[TMP138]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP139]], i32 0 -; POST-PROCESS-NEXT: [[TMP141:%.*]] = load i32, ptr addrspace(21) [[TMP140]], align 4 -; POST-PROCESS-NEXT: [[TMP142:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP143:%.*]] = add i32 [[TMP142]], -120 -; POST-PROCESS-NEXT: [[TMP144:%.*]] = add i32 [[TMP143]], 196 +; POST-PROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP140]], align 4 +; POST-PROCESS-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: [[TMP141:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-NEXT: [[TMP142:%.*]] = inttoptr i32 [[TMP141]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP143:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP142]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP29]], ptr addrspace(21) [[TMP143]], align 4 +; POST-PROCESS-NEXT: [[TMP144:%.*]] = add i32 [[TMP141]], 4 ; POST-PROCESS-NEXT: [[TMP145:%.*]] = inttoptr i32 [[TMP144]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP146:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP145]], i32 0 -; POST-PROCESS-NEXT: [[TMP147:%.*]] = load i32, ptr addrspace(21) [[TMP146]], align 4 -; POST-PROCESS-NEXT: [[TMP148:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP149:%.*]] = add i32 [[TMP148]], -120 -; POST-PROCESS-NEXT: [[TMP150:%.*]] = add i32 [[TMP149]], 200 +; POST-PROCESS-NEXT: store i32 [[TMP33]], ptr addrspace(21) [[TMP146]], align 4 +; POST-PROCESS-NEXT: [[TMP147:%.*]] = add i32 [[TMP141]], 8 +; POST-PROCESS-NEXT: [[TMP148:%.*]] = inttoptr i32 [[TMP147]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP149:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP148]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP37]], ptr addrspace(21) [[TMP149]], align 4 +; POST-PROCESS-NEXT: [[TMP150:%.*]] = add i32 [[TMP141]], 12 ; POST-PROCESS-NEXT: [[TMP151:%.*]] = inttoptr i32 [[TMP150]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP152:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP151]], i32 0 -; POST-PROCESS-NEXT: [[TMP153:%.*]] = load i32, ptr addrspace(21) [[TMP152]], align 4 -; POST-PROCESS-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP155:%.*]] = add i32 [[TMP154]], -120 -; POST-PROCESS-NEXT: [[TMP156:%.*]] = add i32 [[TMP155]], 204 +; POST-PROCESS-NEXT: store i32 [[TMP41]], ptr addrspace(21) [[TMP152]], align 4 +; POST-PROCESS-NEXT: [[TMP153:%.*]] = add i32 [[TMP141]], 16 +; POST-PROCESS-NEXT: [[TMP154:%.*]] = inttoptr i32 [[TMP153]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP155:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP154]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP45]], ptr addrspace(21) [[TMP155]], align 4 +; POST-PROCESS-NEXT: [[TMP156:%.*]] = add i32 [[TMP141]], 20 ; POST-PROCESS-NEXT: [[TMP157:%.*]] = inttoptr i32 [[TMP156]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP157]], i32 0 -; POST-PROCESS-NEXT: [[TMP159:%.*]] = load i32, ptr addrspace(21) [[TMP158]], align 4 -; POST-PROCESS-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP161:%.*]] = add i32 [[TMP160]], -120 -; POST-PROCESS-NEXT: [[TMP162:%.*]] = add i32 [[TMP161]], 208 +; POST-PROCESS-NEXT: store i32 [[TMP49]], ptr addrspace(21) [[TMP158]], align 4 +; POST-PROCESS-NEXT: [[TMP159:%.*]] = add i32 [[TMP141]], 24 +; POST-PROCESS-NEXT: [[TMP160:%.*]] = inttoptr i32 [[TMP159]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP161:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP160]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP53]], ptr addrspace(21) [[TMP161]], align 4 +; POST-PROCESS-NEXT: [[TMP162:%.*]] = add i32 [[TMP141]], 28 ; POST-PROCESS-NEXT: [[TMP163:%.*]] = inttoptr i32 [[TMP162]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP164:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP163]], i32 0 -; POST-PROCESS-NEXT: [[TMP165:%.*]] = load i32, ptr addrspace(21) [[TMP164]], align 4 -; POST-PROCESS-NEXT: [[TMP166:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP167:%.*]] = add i32 [[TMP166]], -120 -; POST-PROCESS-NEXT: [[TMP168:%.*]] = add i32 [[TMP167]], 212 +; POST-PROCESS-NEXT: store i32 [[TMP57]], ptr addrspace(21) [[TMP164]], align 4 +; POST-PROCESS-NEXT: [[TMP165:%.*]] = add i32 [[TMP141]], 32 +; POST-PROCESS-NEXT: [[TMP166:%.*]] = inttoptr i32 [[TMP165]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP167:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP166]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP61]], ptr addrspace(21) [[TMP167]], align 4 +; POST-PROCESS-NEXT: [[TMP168:%.*]] = add i32 [[TMP141]], 36 ; POST-PROCESS-NEXT: [[TMP169:%.*]] = inttoptr i32 [[TMP168]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP170:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP169]], i32 0 -; POST-PROCESS-NEXT: [[TMP171:%.*]] = load i32, ptr addrspace(21) [[TMP170]], align 4 -; POST-PROCESS-NEXT: [[TMP172:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP173:%.*]] = add i32 [[TMP172]], -120 -; POST-PROCESS-NEXT: [[TMP174:%.*]] = add i32 [[TMP173]], 216 +; POST-PROCESS-NEXT: store i32 [[TMP65]], ptr addrspace(21) [[TMP170]], align 4 +; POST-PROCESS-NEXT: [[TMP171:%.*]] = add i32 [[TMP141]], 40 +; POST-PROCESS-NEXT: [[TMP172:%.*]] = inttoptr i32 [[TMP171]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP173:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP172]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP69]], ptr addrspace(21) [[TMP173]], align 4 +; POST-PROCESS-NEXT: [[TMP174:%.*]] = add i32 [[TMP141]], 44 ; POST-PROCESS-NEXT: [[TMP175:%.*]] = inttoptr i32 [[TMP174]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP176:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP175]], i32 0 -; POST-PROCESS-NEXT: [[TMP177:%.*]] = load i32, ptr addrspace(21) [[TMP176]], align 4 -; POST-PROCESS-NEXT: [[TMP178:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP179:%.*]] = add i32 [[TMP178]], -120 -; POST-PROCESS-NEXT: [[TMP180:%.*]] = add i32 [[TMP179]], 220 +; POST-PROCESS-NEXT: store i32 [[TMP73]], ptr addrspace(21) [[TMP176]], align 4 +; POST-PROCESS-NEXT: [[TMP177:%.*]] = add i32 [[TMP141]], 48 +; POST-PROCESS-NEXT: [[TMP178:%.*]] = inttoptr i32 [[TMP177]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP179:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP178]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP77]], ptr addrspace(21) [[TMP179]], align 4 +; POST-PROCESS-NEXT: [[TMP180:%.*]] = add i32 [[TMP141]], 52 ; POST-PROCESS-NEXT: [[TMP181:%.*]] = inttoptr i32 [[TMP180]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP182:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP181]], i32 0 -; POST-PROCESS-NEXT: [[TMP183:%.*]] = load i32, ptr addrspace(21) [[TMP182]], align 4 -; POST-PROCESS-NEXT: [[TMP184:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP185:%.*]] = add i32 [[TMP184]], -120 -; POST-PROCESS-NEXT: [[TMP186:%.*]] = add i32 [[TMP185]], 224 +; POST-PROCESS-NEXT: store i32 [[TMP81]], ptr addrspace(21) [[TMP182]], align 4 +; POST-PROCESS-NEXT: [[TMP183:%.*]] = add i32 [[TMP141]], 56 +; POST-PROCESS-NEXT: [[TMP184:%.*]] = inttoptr i32 [[TMP183]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP185:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP184]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP85]], ptr addrspace(21) [[TMP185]], align 4 +; POST-PROCESS-NEXT: [[TMP186:%.*]] = add i32 [[TMP141]], 60 ; POST-PROCESS-NEXT: [[TMP187:%.*]] = inttoptr i32 [[TMP186]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP188:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP187]], i32 0 -; POST-PROCESS-NEXT: [[TMP189:%.*]] = load i32, ptr addrspace(21) [[TMP188]], align 4 -; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POST-PROCESS-NEXT: [[TMP190:%.*]] = add i32 [[TMP4]], 116 -; POST-PROCESS-NEXT: [[TMP191:%.*]] = inttoptr i32 [[TMP190]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP192:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP191]], i32 0 -; POST-PROCESS-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP192]], align 4 -; POST-PROCESS-NEXT: [[TMP193:%.*]] = add i32 [[TMP4]], 108 -; POST-PROCESS-NEXT: [[TMP194:%.*]] = inttoptr i32 [[TMP193]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP195:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP194]], i32 0 -; POST-PROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP195]], align 4 -; POST-PROCESS-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-NEXT: [[TMP196:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP197:%.*]] = add i32 [[TMP196]], -120 -; POST-PROCESS-NEXT: [[TMP198:%.*]] = add i32 [[TMP197]], 120 +; POST-PROCESS-NEXT: store i32 [[TMP89]], ptr addrspace(21) [[TMP188]], align 4 +; POST-PROCESS-NEXT: [[TMP189:%.*]] = add i32 [[TMP141]], 64 +; POST-PROCESS-NEXT: [[TMP190:%.*]] = inttoptr i32 [[TMP189]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP191:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP190]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP93]], ptr addrspace(21) [[TMP191]], align 4 +; POST-PROCESS-NEXT: [[TMP192:%.*]] = add i32 [[TMP141]], 68 +; POST-PROCESS-NEXT: [[TMP193:%.*]] = inttoptr i32 [[TMP192]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP194:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP193]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP97]], ptr addrspace(21) [[TMP194]], align 4 +; POST-PROCESS-NEXT: [[TMP195:%.*]] = add i32 [[TMP141]], 72 +; POST-PROCESS-NEXT: [[TMP196:%.*]] = inttoptr i32 [[TMP195]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP197:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP196]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP101]], ptr addrspace(21) [[TMP197]], align 4 +; POST-PROCESS-NEXT: [[TMP198:%.*]] = add i32 [[TMP141]], 76 ; POST-PROCESS-NEXT: [[TMP199:%.*]] = inttoptr i32 [[TMP198]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP200:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP199]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP33]], ptr addrspace(21) [[TMP200]], align 4 -; POST-PROCESS-NEXT: [[TMP201:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP202:%.*]] = add i32 [[TMP201]], -120 -; POST-PROCESS-NEXT: [[TMP203:%.*]] = add i32 [[TMP202]], 124 -; POST-PROCESS-NEXT: [[TMP204:%.*]] = inttoptr i32 [[TMP203]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP205:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP204]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP39]], ptr addrspace(21) [[TMP205]], align 4 -; POST-PROCESS-NEXT: [[TMP206:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP207:%.*]] = add i32 [[TMP206]], -120 -; POST-PROCESS-NEXT: [[TMP208:%.*]] = add i32 [[TMP207]], 128 -; POST-PROCESS-NEXT: [[TMP209:%.*]] = inttoptr i32 [[TMP208]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP210:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP209]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP45]], ptr addrspace(21) [[TMP210]], align 4 -; POST-PROCESS-NEXT: [[TMP211:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP212:%.*]] = add i32 [[TMP211]], -120 -; POST-PROCESS-NEXT: [[TMP213:%.*]] = add i32 [[TMP212]], 132 +; POST-PROCESS-NEXT: store i32 [[TMP105]], ptr addrspace(21) [[TMP200]], align 4 +; POST-PROCESS-NEXT: [[TMP201:%.*]] = add i32 [[TMP141]], 80 +; POST-PROCESS-NEXT: [[TMP202:%.*]] = inttoptr i32 [[TMP201]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP203:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP202]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP109]], ptr addrspace(21) [[TMP203]], align 4 +; POST-PROCESS-NEXT: [[TMP204:%.*]] = add i32 [[TMP141]], 84 +; POST-PROCESS-NEXT: [[TMP205:%.*]] = inttoptr i32 [[TMP204]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP206:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP205]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP113]], ptr addrspace(21) [[TMP206]], align 4 +; POST-PROCESS-NEXT: [[TMP207:%.*]] = add i32 [[TMP141]], 88 +; POST-PROCESS-NEXT: [[TMP208:%.*]] = inttoptr i32 [[TMP207]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP209:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP208]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP117]], ptr addrspace(21) [[TMP209]], align 4 +; POST-PROCESS-NEXT: [[TMP210:%.*]] = add i32 [[TMP141]], 92 +; POST-PROCESS-NEXT: [[TMP211:%.*]] = inttoptr i32 [[TMP210]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP212:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP211]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP121]], ptr addrspace(21) [[TMP212]], align 4 +; POST-PROCESS-NEXT: [[TMP213:%.*]] = add i32 [[TMP141]], 96 ; POST-PROCESS-NEXT: [[TMP214:%.*]] = inttoptr i32 [[TMP213]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP215:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP214]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP51]], ptr addrspace(21) [[TMP215]], align 4 -; POST-PROCESS-NEXT: [[TMP216:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP217:%.*]] = add i32 [[TMP216]], -120 -; POST-PROCESS-NEXT: [[TMP218:%.*]] = add i32 [[TMP217]], 136 -; POST-PROCESS-NEXT: [[TMP219:%.*]] = inttoptr i32 [[TMP218]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP220:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP219]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP57]], ptr addrspace(21) [[TMP220]], align 4 -; POST-PROCESS-NEXT: [[TMP221:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP222:%.*]] = add i32 [[TMP221]], -120 -; POST-PROCESS-NEXT: [[TMP223:%.*]] = add i32 [[TMP222]], 140 -; POST-PROCESS-NEXT: [[TMP224:%.*]] = inttoptr i32 [[TMP223]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP225:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP224]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP63]], ptr addrspace(21) [[TMP225]], align 4 -; POST-PROCESS-NEXT: [[TMP226:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP227:%.*]] = add i32 [[TMP226]], -120 -; POST-PROCESS-NEXT: [[TMP228:%.*]] = add i32 [[TMP227]], 144 -; POST-PROCESS-NEXT: [[TMP229:%.*]] = inttoptr i32 [[TMP228]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP230:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP229]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP69]], ptr addrspace(21) [[TMP230]], align 4 -; POST-PROCESS-NEXT: [[TMP231:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP232:%.*]] = add i32 [[TMP231]], -120 -; POST-PROCESS-NEXT: [[TMP233:%.*]] = add i32 [[TMP232]], 148 -; POST-PROCESS-NEXT: [[TMP234:%.*]] = inttoptr i32 [[TMP233]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP235:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP234]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP75]], ptr addrspace(21) [[TMP235]], align 4 -; POST-PROCESS-NEXT: [[TMP236:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP237:%.*]] = add i32 [[TMP236]], -120 -; POST-PROCESS-NEXT: [[TMP238:%.*]] = add i32 [[TMP237]], 152 -; POST-PROCESS-NEXT: [[TMP239:%.*]] = inttoptr i32 [[TMP238]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP240:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP239]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP81]], ptr addrspace(21) [[TMP240]], align 4 -; POST-PROCESS-NEXT: [[TMP241:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP242:%.*]] = add i32 [[TMP241]], -120 -; POST-PROCESS-NEXT: [[TMP243:%.*]] = add i32 [[TMP242]], 156 -; POST-PROCESS-NEXT: [[TMP244:%.*]] = inttoptr i32 [[TMP243]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP245:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP244]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP87]], ptr addrspace(21) [[TMP245]], align 4 -; POST-PROCESS-NEXT: [[TMP246:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP247:%.*]] = add i32 [[TMP246]], -120 -; POST-PROCESS-NEXT: [[TMP248:%.*]] = add i32 [[TMP247]], 160 -; POST-PROCESS-NEXT: [[TMP249:%.*]] = inttoptr i32 [[TMP248]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP250:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP249]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP93]], ptr addrspace(21) [[TMP250]], align 4 -; POST-PROCESS-NEXT: [[TMP251:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP252:%.*]] = add i32 [[TMP251]], -120 -; POST-PROCESS-NEXT: [[TMP253:%.*]] = add i32 [[TMP252]], 164 -; POST-PROCESS-NEXT: [[TMP254:%.*]] = inttoptr i32 [[TMP253]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP255:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP254]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP99]], ptr addrspace(21) [[TMP255]], align 4 -; POST-PROCESS-NEXT: [[TMP256:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP257:%.*]] = add i32 [[TMP256]], -120 -; POST-PROCESS-NEXT: [[TMP258:%.*]] = add i32 [[TMP257]], 168 -; POST-PROCESS-NEXT: [[TMP259:%.*]] = inttoptr i32 [[TMP258]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP260:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP259]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP105]], ptr addrspace(21) [[TMP260]], align 4 -; POST-PROCESS-NEXT: [[TMP261:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP262:%.*]] = add i32 [[TMP261]], -120 -; POST-PROCESS-NEXT: [[TMP263:%.*]] = add i32 [[TMP262]], 172 -; POST-PROCESS-NEXT: [[TMP264:%.*]] = inttoptr i32 [[TMP263]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP265:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP264]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP111]], ptr addrspace(21) [[TMP265]], align 4 -; POST-PROCESS-NEXT: [[TMP266:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP267:%.*]] = add i32 [[TMP266]], -120 -; POST-PROCESS-NEXT: [[TMP268:%.*]] = add i32 [[TMP267]], 176 -; POST-PROCESS-NEXT: [[TMP269:%.*]] = inttoptr i32 [[TMP268]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP270:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP269]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP117]], ptr addrspace(21) [[TMP270]], align 4 -; POST-PROCESS-NEXT: [[TMP271:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP272:%.*]] = add i32 [[TMP271]], -120 -; POST-PROCESS-NEXT: [[TMP273:%.*]] = add i32 [[TMP272]], 180 -; POST-PROCESS-NEXT: [[TMP274:%.*]] = inttoptr i32 [[TMP273]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP275:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP274]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP123]], ptr addrspace(21) [[TMP275]], align 4 -; POST-PROCESS-NEXT: [[TMP276:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP277:%.*]] = add i32 [[TMP276]], -120 -; POST-PROCESS-NEXT: [[TMP278:%.*]] = add i32 [[TMP277]], 184 -; POST-PROCESS-NEXT: [[TMP279:%.*]] = inttoptr i32 [[TMP278]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP280:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP279]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP129]], ptr addrspace(21) [[TMP280]], align 4 -; POST-PROCESS-NEXT: [[TMP281:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP282:%.*]] = add i32 [[TMP281]], -120 -; POST-PROCESS-NEXT: [[TMP283:%.*]] = add i32 [[TMP282]], 188 -; POST-PROCESS-NEXT: [[TMP284:%.*]] = inttoptr i32 [[TMP283]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP285:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP284]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP135]], ptr addrspace(21) [[TMP285]], align 4 -; POST-PROCESS-NEXT: [[TMP286:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP287:%.*]] = add i32 [[TMP286]], -120 -; POST-PROCESS-NEXT: [[TMP288:%.*]] = add i32 [[TMP287]], 192 -; POST-PROCESS-NEXT: [[TMP289:%.*]] = inttoptr i32 [[TMP288]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP290:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP289]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP141]], ptr addrspace(21) [[TMP290]], align 4 -; POST-PROCESS-NEXT: [[TMP291:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP292:%.*]] = add i32 [[TMP291]], -120 -; POST-PROCESS-NEXT: [[TMP293:%.*]] = add i32 [[TMP292]], 196 -; POST-PROCESS-NEXT: [[TMP294:%.*]] = inttoptr i32 [[TMP293]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP295:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP294]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP147]], ptr addrspace(21) [[TMP295]], align 4 -; POST-PROCESS-NEXT: [[TMP296:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP297:%.*]] = add i32 [[TMP296]], -120 -; POST-PROCESS-NEXT: [[TMP298:%.*]] = add i32 [[TMP297]], 200 -; POST-PROCESS-NEXT: [[TMP299:%.*]] = inttoptr i32 [[TMP298]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP300:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP299]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP153]], ptr addrspace(21) [[TMP300]], align 4 -; POST-PROCESS-NEXT: [[TMP301:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP302:%.*]] = add i32 [[TMP301]], -120 -; POST-PROCESS-NEXT: [[TMP303:%.*]] = add i32 [[TMP302]], 204 -; POST-PROCESS-NEXT: [[TMP304:%.*]] = inttoptr i32 [[TMP303]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP305:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP304]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP159]], ptr addrspace(21) [[TMP305]], align 4 -; POST-PROCESS-NEXT: [[TMP306:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP307:%.*]] = add i32 [[TMP306]], -120 -; POST-PROCESS-NEXT: [[TMP308:%.*]] = add i32 [[TMP307]], 208 -; POST-PROCESS-NEXT: [[TMP309:%.*]] = inttoptr i32 [[TMP308]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP310:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP309]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP165]], ptr addrspace(21) [[TMP310]], align 4 -; POST-PROCESS-NEXT: [[TMP311:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP312:%.*]] = add i32 [[TMP311]], -120 -; POST-PROCESS-NEXT: [[TMP313:%.*]] = add i32 [[TMP312]], 212 -; POST-PROCESS-NEXT: [[TMP314:%.*]] = inttoptr i32 [[TMP313]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP315:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP314]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP171]], ptr addrspace(21) [[TMP315]], align 4 -; POST-PROCESS-NEXT: [[TMP316:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP317:%.*]] = add i32 [[TMP316]], -120 -; POST-PROCESS-NEXT: [[TMP318:%.*]] = add i32 [[TMP317]], 216 -; POST-PROCESS-NEXT: [[TMP319:%.*]] = inttoptr i32 [[TMP318]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP320:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP319]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP177]], ptr addrspace(21) [[TMP320]], align 4 -; POST-PROCESS-NEXT: [[TMP321:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP322:%.*]] = add i32 [[TMP321]], -120 -; POST-PROCESS-NEXT: [[TMP323:%.*]] = add i32 [[TMP322]], 220 -; POST-PROCESS-NEXT: [[TMP324:%.*]] = inttoptr i32 [[TMP323]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP325:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP324]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP183]], ptr addrspace(21) [[TMP325]], align 4 -; POST-PROCESS-NEXT: [[TMP326:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP327:%.*]] = add i32 [[TMP326]], -120 -; POST-PROCESS-NEXT: [[TMP328:%.*]] = add i32 [[TMP327]], 224 -; POST-PROCESS-NEXT: [[TMP329:%.*]] = inttoptr i32 [[TMP328]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP330:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP329]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP189]], ptr addrspace(21) [[TMP330]], align 4 +; POST-PROCESS-NEXT: store i32 [[TMP125]], ptr addrspace(21) [[TMP215]], align 4 +; POST-PROCESS-NEXT: [[TMP216:%.*]] = add i32 [[TMP141]], 100 +; POST-PROCESS-NEXT: [[TMP217:%.*]] = inttoptr i32 [[TMP216]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP218:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP217]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP129]], ptr addrspace(21) [[TMP218]], align 4 +; POST-PROCESS-NEXT: [[TMP219:%.*]] = add i32 [[TMP141]], 104 +; POST-PROCESS-NEXT: [[TMP220:%.*]] = inttoptr i32 [[TMP219]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP221:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP220]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP133]], ptr addrspace(21) [[TMP221]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 -; POST-PROCESS-NEXT: [[TMP331:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP331]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] +; POST-PROCESS-NEXT: [[TMP222:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP223:%.*]] = add i32 [[TMP222]], -120 +; POST-PROCESS-NEXT: store i32 [[TMP223]], ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP224:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP224]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] ; POST-PROCESS-NEXT: unreachable ; ; @@ -2505,184 +2281,128 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() ; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) -; POST-PROCESS-GLOBAL-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() +; POST-PROCESS-GLOBAL-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[_CONT_SETUPRAYGEN:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() ; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 108 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP5]]) -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP5]]) +; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) ; POST-PROCESS-GLOBAL-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(20) @REGISTERS, align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 120 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP10]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP11]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP12]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP13]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 124 +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = add i32 [[TMP10]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP14]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP15]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = add i32 [[TMP10]], 12 ; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP16]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP17]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 128 +; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = add i32 [[TMP10]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP18]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP19]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = add i32 [[TMP10]], 20 ; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP20]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP21]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], 132 +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = add i32 [[TMP10]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP22]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP23]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = add i32 [[TMP10]], 28 ; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP24]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP25]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = add i32 [[TMP26]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 136 +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = add i32 [[TMP10]], 32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP26]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP27]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = add i32 [[TMP10]], 36 ; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP28]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP29]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP30]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 140 +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = add i32 [[TMP10]], 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP30]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP31]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = add i32 [[TMP10]], 44 ; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP32]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP33]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], 144 +; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = add i32 [[TMP10]], 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP34]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP35]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = add i32 [[TMP10]], 52 ; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP36]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP37]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = add i32 [[TMP38]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = add i32 [[TMP39]], 148 +; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = add i32 [[TMP10]], 56 +; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP38]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP39]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = add i32 [[TMP10]], 60 ; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP40]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP41]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = add i32 [[TMP42]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = add i32 [[TMP43]], 152 +; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = add i32 [[TMP10]], 64 +; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP42]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP43]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = add i32 [[TMP10]], 68 ; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP44]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP45]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 156 +; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP10]], 72 +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP46]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP47]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = add i32 [[TMP10]], 76 ; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP48]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP49]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 160 +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = add i32 [[TMP10]], 80 +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP50]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP51]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP10]], 84 ; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP52]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP53]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = add i32 [[TMP54]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = add i32 [[TMP55]], 164 +; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = add i32 [[TMP10]], 88 +; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP54]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP55]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = add i32 [[TMP10]], 92 ; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP56]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP57]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = add i32 [[TMP58]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = add i32 [[TMP59]], 168 +; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = add i32 [[TMP10]], 96 +; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP58]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP59]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = add i32 [[TMP10]], 100 ; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP60]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP61]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = add i32 [[TMP62]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = add i32 [[TMP63]], 172 -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP64]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP65]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP66]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = add i32 [[TMP67]], 176 -; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP68]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP69]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = add i32 [[TMP70]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 180 -; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP72]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP73]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = add i32 [[TMP74]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP75]], 184 -; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP76]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP77]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = add i32 [[TMP78]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = add i32 [[TMP79]], 188 -; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP80]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP81]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = add i32 [[TMP82]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = add i32 [[TMP83]], 192 -; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP84]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP85]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = add i32 [[TMP86]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = add i32 [[TMP87]], 196 -; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP88]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP89]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP90]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = add i32 [[TMP91]], 200 -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP92]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP93]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = add i32 [[TMP94]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], 204 -; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP96]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP97]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = add i32 [[TMP98]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], 208 -; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP100]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP101]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = add i32 [[TMP102]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = add i32 [[TMP103]], 212 -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP104]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP105]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = add i32 [[TMP106]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = add i32 [[TMP107]], 216 -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP108]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP109]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = add i32 [[TMP110]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = add i32 [[TMP111]], 220 -; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP112]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP113]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = add i32 [[TMP114]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = add i32 [[TMP115]], 224 -; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP116]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP117]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = add i32 [[TMP118]], 108 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP119]], ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @main.resume.0 to i64)) -; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP120]], i64 [[TMP121]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18:![0-9]+]], !continuation.returnedRegistercount !18 +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = add i32 [[TMP10]], 104 +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP62]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP63]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @main.resume.0 to i64)) +; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP64]], i64 [[TMP65]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18:![0-9]+]], !continuation.returnedRegistercount !18 ; POST-PROCESS-GLOBAL-NEXT: unreachable ; ; @@ -2695,169 +2415,116 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) ; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -108 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP30]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP32]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(22) [[TMP33]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[TMP36]], 124 +; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP5]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(22) [[TMP29]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP5]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP31]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(22) [[TMP32]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = add i32 [[TMP5]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP34]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(22) [[TMP35]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[TMP5]], 12 ; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP37]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(22) [[TMP38]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], 128 -; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP42]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(22) [[TMP43]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP45]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], 132 -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP47]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(22) [[TMP48]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 136 +; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = add i32 [[TMP5]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP40]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(22) [[TMP41]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = add i32 [[TMP5]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP43]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(22) [[TMP44]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP5]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP46]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(22) [[TMP47]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = add i32 [[TMP5]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP49]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(22) [[TMP50]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP5]], 32 ; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP52]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(22) [[TMP53]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = add i32 [[TMP55]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = add i32 [[TMP56]], 140 -; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP57]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(22) [[TMP58]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[TMP60]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = add i32 [[TMP61]], 144 -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP62]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(22) [[TMP63]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = add i32 [[TMP65]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP66]], 148 +; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = add i32 [[TMP5]], 36 +; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP55]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(22) [[TMP56]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = add i32 [[TMP5]], 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP58]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(22) [[TMP59]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[TMP5]], 44 +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP61]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(22) [[TMP62]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = add i32 [[TMP5]], 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP64]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(22) [[TMP65]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP5]], 52 ; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP67]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(22) [[TMP68]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = add i32 [[TMP70]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 152 -; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP72]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(22) [[TMP73]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP75]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = add i32 [[TMP76]], 156 -; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP77]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(22) [[TMP78]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = add i32 [[TMP80]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = add i32 [[TMP81]], 160 +; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = add i32 [[TMP5]], 56 +; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP70]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = load i32, ptr addrspace(22) [[TMP71]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = add i32 [[TMP5]], 60 +; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP73]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(22) [[TMP74]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP5]], 64 +; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP76]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(22) [[TMP77]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = add i32 [[TMP5]], 68 +; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP79]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(22) [[TMP80]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = add i32 [[TMP5]], 72 ; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP82]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(22) [[TMP83]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = add i32 [[TMP85]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = add i32 [[TMP86]], 164 -; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP87]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(22) [[TMP88]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP90]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = add i32 [[TMP91]], 168 -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP92]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(22) [[TMP93]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = add i32 [[TMP96]], 172 +; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = add i32 [[TMP5]], 76 +; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP85]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(22) [[TMP86]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = add i32 [[TMP5]], 80 +; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP88]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(22) [[TMP89]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP5]], 84 +; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP91]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(22) [[TMP92]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = add i32 [[TMP5]], 88 +; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP94]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = load i32, ptr addrspace(22) [[TMP95]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = add i32 [[TMP5]], 92 ; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP97]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(22) [[TMP98]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = add i32 [[TMP100]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = add i32 [[TMP101]], 176 -; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP102]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(22) [[TMP103]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = add i32 [[TMP105]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = add i32 [[TMP106]], 180 -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP107]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(22) [[TMP108]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = add i32 [[TMP110]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = add i32 [[TMP111]], 184 -; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP112]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(22) [[TMP113]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = add i32 [[TMP115]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = add i32 [[TMP116]], 188 -; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP117]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = load i32, ptr addrspace(22) [[TMP118]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = add i32 [[TMP120]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = add i32 [[TMP121]], 192 -; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP122]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(22) [[TMP123]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = add i32 [[TMP125]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = add i32 [[TMP126]], 196 -; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP127]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(22) [[TMP128]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = add i32 [[TMP130]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = add i32 [[TMP131]], 200 -; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP132]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(22) [[TMP133]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = add i32 [[TMP135]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = add i32 [[TMP136]], 204 -; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP137]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = load i32, ptr addrspace(22) [[TMP138]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = add i32 [[TMP140]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = add i32 [[TMP141]], 208 -; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP142]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = load i32, ptr addrspace(22) [[TMP143]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = add i32 [[TMP145]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = add i32 [[TMP146]], 212 -; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP147]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = load i32, ptr addrspace(22) [[TMP148]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = add i32 [[TMP150]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = add i32 [[TMP151]], 216 -; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP152]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(22) [[TMP153]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = add i32 [[TMP155]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = add i32 [[TMP156]], 220 -; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP157]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = load i32, ptr addrspace(22) [[TMP158]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = add i32 [[TMP160]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = add i32 [[TMP161]], 224 -; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP162]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = load i32, ptr addrspace(22) [[TMP163]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = add i32 [[TMP5]], 96 +; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP100]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = load i32, ptr addrspace(22) [[TMP101]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = add i32 [[TMP5]], 100 +; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP103]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(22) [[TMP104]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = add i32 [[TMP5]], 104 +; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP106]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(22) [[TMP107]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = add i32 [[TMP110]], -108 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP111]], ptr [[CSP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: ret void ; POST-PROCESS-GLOBAL: entryresume.0.split: ; POST-PROCESS-GLOBAL-NEXT: unreachable @@ -2902,346 +2569,238 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; POST-PROCESS-GLOBAL-NEXT: store i32 [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], 120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP30]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(22) [[TMP31]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], 124 -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP35]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(22) [[TMP36]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = add i32 [[TMP38]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = add i32 [[TMP39]], 128 +; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP5]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(22) [[TMP29]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP5]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP31]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(22) [[TMP32]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = add i32 [[TMP5]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP34]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(22) [[TMP35]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[TMP5]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP37]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(22) [[TMP38]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = add i32 [[TMP5]], 16 ; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP40]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(22) [[TMP41]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = add i32 [[TMP43]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = add i32 [[TMP44]], 132 -; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP45]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(22) [[TMP46]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = add i32 [[TMP48]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = add i32 [[TMP49]], 136 -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP50]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(22) [[TMP51]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = add i32 [[TMP53]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = add i32 [[TMP54]], 140 +; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = add i32 [[TMP5]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP43]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(22) [[TMP44]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP5]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP46]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(22) [[TMP47]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = add i32 [[TMP5]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP49]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(22) [[TMP50]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP5]], 32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP52]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(22) [[TMP53]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = add i32 [[TMP5]], 36 ; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP55]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(22) [[TMP56]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = add i32 [[TMP58]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = add i32 [[TMP59]], 144 -; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP60]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(22) [[TMP61]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = add i32 [[TMP63]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = add i32 [[TMP64]], 148 -; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP65]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(22) [[TMP66]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = add i32 [[TMP68]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = add i32 [[TMP69]], 152 +; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = add i32 [[TMP5]], 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP58]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(22) [[TMP59]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[TMP5]], 44 +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP61]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(22) [[TMP62]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = add i32 [[TMP5]], 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP64]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(22) [[TMP65]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP5]], 52 +; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP67]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(22) [[TMP68]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = add i32 [[TMP5]], 56 ; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP70]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = load i32, ptr addrspace(22) [[TMP71]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = add i32 [[TMP73]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = add i32 [[TMP74]], 156 -; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP75]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(22) [[TMP76]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = add i32 [[TMP78]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = add i32 [[TMP79]], 160 -; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP80]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = load i32, ptr addrspace(22) [[TMP81]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = add i32 [[TMP83]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = add i32 [[TMP84]], 164 +; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = add i32 [[TMP5]], 60 +; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP73]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(22) [[TMP74]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP5]], 64 +; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP76]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(22) [[TMP77]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = add i32 [[TMP5]], 68 +; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP79]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(22) [[TMP80]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = add i32 [[TMP5]], 72 +; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP82]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(22) [[TMP83]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = add i32 [[TMP5]], 76 ; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP85]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(22) [[TMP86]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = add i32 [[TMP88]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = add i32 [[TMP89]], 168 -; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP90]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = load i32, ptr addrspace(22) [[TMP91]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = add i32 [[TMP93]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = add i32 [[TMP94]], 172 -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP95]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = load i32, ptr addrspace(22) [[TMP96]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = add i32 [[TMP98]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], 176 +; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = add i32 [[TMP5]], 80 +; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP88]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(22) [[TMP89]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP5]], 84 +; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP91]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(22) [[TMP92]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = add i32 [[TMP5]], 88 +; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP94]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = load i32, ptr addrspace(22) [[TMP95]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = add i32 [[TMP5]], 92 +; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP97]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(22) [[TMP98]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = add i32 [[TMP5]], 96 ; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP100]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = load i32, ptr addrspace(22) [[TMP101]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = add i32 [[TMP103]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = add i32 [[TMP104]], 180 -; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP105]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = load i32, ptr addrspace(22) [[TMP106]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = add i32 [[TMP108]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = add i32 [[TMP109]], 184 -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP110]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = load i32, ptr addrspace(22) [[TMP111]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = add i32 [[TMP113]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = add i32 [[TMP114]], 188 -; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP115]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = load i32, ptr addrspace(22) [[TMP116]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = add i32 [[TMP118]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], 192 -; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP120]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = load i32, ptr addrspace(22) [[TMP121]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = add i32 [[TMP123]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = add i32 [[TMP124]], 196 -; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP125]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = load i32, ptr addrspace(22) [[TMP126]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = add i32 [[TMP128]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = add i32 [[TMP129]], 200 -; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP130]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = load i32, ptr addrspace(22) [[TMP131]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = add i32 [[TMP133]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = add i32 [[TMP134]], 204 -; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP135]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = load i32, ptr addrspace(22) [[TMP136]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = add i32 [[TMP138]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = add i32 [[TMP139]], 208 -; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP140]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = load i32, ptr addrspace(22) [[TMP141]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = add i32 [[TMP143]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = add i32 [[TMP144]], 212 -; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP145]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = load i32, ptr addrspace(22) [[TMP146]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = add i32 [[TMP148]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = add i32 [[TMP149]], 216 -; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP150]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = load i32, ptr addrspace(22) [[TMP151]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = add i32 [[TMP153]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = add i32 [[TMP154]], 220 -; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP155]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = load i32, ptr addrspace(22) [[TMP156]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = add i32 [[TMP158]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = add i32 [[TMP159]], 224 -; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP160]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = load i32, ptr addrspace(22) [[TMP161]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP163]], i32 0, i32 1 +; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = add i32 [[TMP5]], 100 +; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP103]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(22) [[TMP104]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = add i32 [[TMP5]], 104 +; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP106]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(22) [[TMP107]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POST-PROCESS-GLOBAL-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP109]], i32 0, i32 1 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], i32 0, i32 0 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_LOAD:%.*]] = load <2 x float>, ptr [[VAL_I_FCA_0_GEP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[VAL_I_FCA_0_LOAD]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_011_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 ; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_011_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POST-PROCESS-GLOBAL-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP5]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP6]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP7]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = add i32 [[TMP166]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = add i32 [[TMP167]], 120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP168]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP32]], ptr addrspace(22) [[TMP169]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = add i32 [[TMP170]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP172:%.*]] = add i32 [[TMP171]], 124 -; POST-PROCESS-GLOBAL-NEXT: [[TMP173:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP172]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP37]], ptr addrspace(22) [[TMP173]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP174:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP175:%.*]] = add i32 [[TMP174]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP176:%.*]] = add i32 [[TMP175]], 128 -; POST-PROCESS-GLOBAL-NEXT: [[TMP177:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP176]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP42]], ptr addrspace(22) [[TMP177]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP178:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP179:%.*]] = add i32 [[TMP178]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP180:%.*]] = add i32 [[TMP179]], 132 -; POST-PROCESS-GLOBAL-NEXT: [[TMP181:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP180]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP47]], ptr addrspace(22) [[TMP181]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP182:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP183:%.*]] = add i32 [[TMP182]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP184:%.*]] = add i32 [[TMP183]], 136 -; POST-PROCESS-GLOBAL-NEXT: [[TMP185:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP184]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP52]], ptr addrspace(22) [[TMP185]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP186:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP187:%.*]] = add i32 [[TMP186]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP188:%.*]] = add i32 [[TMP187]], 140 -; POST-PROCESS-GLOBAL-NEXT: [[TMP189:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP188]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP57]], ptr addrspace(22) [[TMP189]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP190:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP191:%.*]] = add i32 [[TMP190]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP192:%.*]] = add i32 [[TMP191]], 144 -; POST-PROCESS-GLOBAL-NEXT: [[TMP193:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP192]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP62]], ptr addrspace(22) [[TMP193]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP194:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP195:%.*]] = add i32 [[TMP194]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP196:%.*]] = add i32 [[TMP195]], 148 -; POST-PROCESS-GLOBAL-NEXT: [[TMP197:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP196]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP67]], ptr addrspace(22) [[TMP197]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP198:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP199:%.*]] = add i32 [[TMP198]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP200:%.*]] = add i32 [[TMP199]], 152 -; POST-PROCESS-GLOBAL-NEXT: [[TMP201:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP200]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP72]], ptr addrspace(22) [[TMP201]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP202:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP203:%.*]] = add i32 [[TMP202]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP204:%.*]] = add i32 [[TMP203]], 156 -; POST-PROCESS-GLOBAL-NEXT: [[TMP205:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP204]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP77]], ptr addrspace(22) [[TMP205]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP206:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP207:%.*]] = add i32 [[TMP206]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP208:%.*]] = add i32 [[TMP207]], 160 -; POST-PROCESS-GLOBAL-NEXT: [[TMP209:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP208]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP82]], ptr addrspace(22) [[TMP209]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP210:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP211:%.*]] = add i32 [[TMP210]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP212:%.*]] = add i32 [[TMP211]], 164 -; POST-PROCESS-GLOBAL-NEXT: [[TMP213:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP212]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP87]], ptr addrspace(22) [[TMP213]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP214:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP215:%.*]] = add i32 [[TMP214]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP216:%.*]] = add i32 [[TMP215]], 168 -; POST-PROCESS-GLOBAL-NEXT: [[TMP217:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP216]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP92]], ptr addrspace(22) [[TMP217]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP218:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP219:%.*]] = add i32 [[TMP218]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP220:%.*]] = add i32 [[TMP219]], 172 -; POST-PROCESS-GLOBAL-NEXT: [[TMP221:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP220]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP97]], ptr addrspace(22) [[TMP221]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP222:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP223:%.*]] = add i32 [[TMP222]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP224:%.*]] = add i32 [[TMP223]], 176 -; POST-PROCESS-GLOBAL-NEXT: [[TMP225:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP224]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP102]], ptr addrspace(22) [[TMP225]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP226:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP227:%.*]] = add i32 [[TMP226]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP228:%.*]] = add i32 [[TMP227]], 180 -; POST-PROCESS-GLOBAL-NEXT: [[TMP229:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP228]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP107]], ptr addrspace(22) [[TMP229]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP230:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP231:%.*]] = add i32 [[TMP230]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP232:%.*]] = add i32 [[TMP231]], 184 -; POST-PROCESS-GLOBAL-NEXT: [[TMP233:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP232]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP112]], ptr addrspace(22) [[TMP233]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP234:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP235:%.*]] = add i32 [[TMP234]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP236:%.*]] = add i32 [[TMP235]], 188 -; POST-PROCESS-GLOBAL-NEXT: [[TMP237:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP236]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP117]], ptr addrspace(22) [[TMP237]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP238:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP239:%.*]] = add i32 [[TMP238]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP240:%.*]] = add i32 [[TMP239]], 192 -; POST-PROCESS-GLOBAL-NEXT: [[TMP241:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP240]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP122]], ptr addrspace(22) [[TMP241]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP242:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP243:%.*]] = add i32 [[TMP242]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP244:%.*]] = add i32 [[TMP243]], 196 -; POST-PROCESS-GLOBAL-NEXT: [[TMP245:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP244]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP127]], ptr addrspace(22) [[TMP245]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP246:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP247:%.*]] = add i32 [[TMP246]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP248:%.*]] = add i32 [[TMP247]], 200 -; POST-PROCESS-GLOBAL-NEXT: [[TMP249:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP248]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP132]], ptr addrspace(22) [[TMP249]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP250:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP251:%.*]] = add i32 [[TMP250]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP252:%.*]] = add i32 [[TMP251]], 204 -; POST-PROCESS-GLOBAL-NEXT: [[TMP253:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP252]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP137]], ptr addrspace(22) [[TMP253]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP254:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP255:%.*]] = add i32 [[TMP254]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP256:%.*]] = add i32 [[TMP255]], 208 -; POST-PROCESS-GLOBAL-NEXT: [[TMP257:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP256]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP142]], ptr addrspace(22) [[TMP257]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP258:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP259:%.*]] = add i32 [[TMP258]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP260:%.*]] = add i32 [[TMP259]], 212 -; POST-PROCESS-GLOBAL-NEXT: [[TMP261:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP260]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP147]], ptr addrspace(22) [[TMP261]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP262:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP263:%.*]] = add i32 [[TMP262]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP264:%.*]] = add i32 [[TMP263]], 216 -; POST-PROCESS-GLOBAL-NEXT: [[TMP265:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP264]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP152]], ptr addrspace(22) [[TMP265]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP266:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP267:%.*]] = add i32 [[TMP266]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP268:%.*]] = add i32 [[TMP267]], 220 -; POST-PROCESS-GLOBAL-NEXT: [[TMP269:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP268]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP157]], ptr addrspace(22) [[TMP269]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP270:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP271:%.*]] = add i32 [[TMP270]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP272:%.*]] = add i32 [[TMP271]], 224 -; POST-PROCESS-GLOBAL-NEXT: [[TMP273:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP272]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP162]], ptr addrspace(22) [[TMP273]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP112]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP30]], ptr addrspace(22) [[TMP113]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = add i32 [[TMP112]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP114]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP33]], ptr addrspace(22) [[TMP115]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = add i32 [[TMP112]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP116]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP36]], ptr addrspace(22) [[TMP117]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = add i32 [[TMP112]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP118]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP39]], ptr addrspace(22) [[TMP119]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = add i32 [[TMP112]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP120]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP42]], ptr addrspace(22) [[TMP121]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = add i32 [[TMP112]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP122]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP45]], ptr addrspace(22) [[TMP123]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = add i32 [[TMP112]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP124]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP48]], ptr addrspace(22) [[TMP125]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = add i32 [[TMP112]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP126]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP51]], ptr addrspace(22) [[TMP127]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = add i32 [[TMP112]], 32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP128]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP54]], ptr addrspace(22) [[TMP129]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = add i32 [[TMP112]], 36 +; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP130]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP57]], ptr addrspace(22) [[TMP131]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = add i32 [[TMP112]], 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP132]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP60]], ptr addrspace(22) [[TMP133]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = add i32 [[TMP112]], 44 +; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP134]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP63]], ptr addrspace(22) [[TMP135]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = add i32 [[TMP112]], 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP136]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP66]], ptr addrspace(22) [[TMP137]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = add i32 [[TMP112]], 52 +; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP138]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP69]], ptr addrspace(22) [[TMP139]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = add i32 [[TMP112]], 56 +; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP140]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP72]], ptr addrspace(22) [[TMP141]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = add i32 [[TMP112]], 60 +; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP142]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP75]], ptr addrspace(22) [[TMP143]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = add i32 [[TMP112]], 64 +; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP144]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP78]], ptr addrspace(22) [[TMP145]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = add i32 [[TMP112]], 68 +; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP146]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP81]], ptr addrspace(22) [[TMP147]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = add i32 [[TMP112]], 72 +; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP148]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP84]], ptr addrspace(22) [[TMP149]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = add i32 [[TMP112]], 76 +; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP150]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP87]], ptr addrspace(22) [[TMP151]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = add i32 [[TMP112]], 80 +; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP152]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP90]], ptr addrspace(22) [[TMP153]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = add i32 [[TMP112]], 84 +; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP154]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP93]], ptr addrspace(22) [[TMP155]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = add i32 [[TMP112]], 88 +; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP156]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP96]], ptr addrspace(22) [[TMP157]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = add i32 [[TMP112]], 92 +; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP158]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP99]], ptr addrspace(22) [[TMP159]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = add i32 [[TMP112]], 96 +; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP160]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP102]], ptr addrspace(22) [[TMP161]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = add i32 [[TMP112]], 100 +; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP162]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP105]], ptr addrspace(22) [[TMP163]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = add i32 [[TMP112]], 104 +; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP164]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP108]], ptr addrspace(22) [[TMP165]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP274:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP275:%.*]] = bitcast i32 [[TMP274]] to float -; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_012_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP275]], i32 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = bitcast i32 [[TMP166]] to float +; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_012_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP167]], i32 0 ; POST-PROCESS-GLOBAL-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; POST-PROCESS-GLOBAL-NEXT: [[TMP276:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP277:%.*]] = bitcast i32 [[TMP276]] to float -; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_012_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_012_0_VEC_INSERT]], float [[TMP277]], i32 1 +; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = bitcast i32 [[TMP168]] to float +; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_012_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_012_0_VEC_INSERT]], float [[TMP169]], i32 1 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_012_4_VEC_INSERT]], 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP278:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POST-PROCESS-GLOBAL-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP278]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) +; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POST-PROCESS-GLOBAL-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP170]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_GEP1:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP1]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 @@ -3272,8 +2831,8 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_GEP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP10]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 -; POST-PROCESS-GLOBAL-NEXT: [[TMP279:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP279]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META18]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP171]], [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META18]] ; POST-PROCESS-GLOBAL-NEXT: unreachable ; ; @@ -3285,354 +2844,244 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() ; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) ; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 108 +; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 120 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = add i32 [[TMP3]], 108 ; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP5]] ; POST-PROCESS-GLOBAL-NEXT: store i64 [[RETURNADDR]], ptr addrspace(22) [[TMP6]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP30]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP32]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(22) [[TMP33]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[TMP36]], 124 -; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP37]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(22) [[TMP38]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], 128 +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP7]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(22) [[TMP31]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = add i32 [[TMP7]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP33]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(22) [[TMP34]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = add i32 [[TMP7]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP36]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(22) [[TMP37]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = add i32 [[TMP7]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP39]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(22) [[TMP40]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = add i32 [[TMP7]], 16 ; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP42]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(22) [[TMP43]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP45]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], 132 -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP47]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(22) [[TMP48]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 136 -; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP52]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(22) [[TMP53]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = add i32 [[TMP55]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = add i32 [[TMP56]], 140 +; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = add i32 [[TMP7]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP45]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(22) [[TMP46]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = add i32 [[TMP7]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP48]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(22) [[TMP49]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = add i32 [[TMP7]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP51]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(22) [[TMP52]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = add i32 [[TMP7]], 32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP54]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = load i32, ptr addrspace(22) [[TMP55]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = add i32 [[TMP7]], 36 ; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP57]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(22) [[TMP58]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[TMP60]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = add i32 [[TMP61]], 144 -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP62]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(22) [[TMP63]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = add i32 [[TMP65]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP66]], 148 -; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP67]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(22) [[TMP68]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = add i32 [[TMP70]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 152 +; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = add i32 [[TMP7]], 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP60]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(22) [[TMP61]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = add i32 [[TMP7]], 44 +; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP63]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(22) [[TMP64]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = add i32 [[TMP7]], 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP66]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(22) [[TMP67]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = add i32 [[TMP7]], 52 +; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP69]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(22) [[TMP70]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = add i32 [[TMP7]], 56 ; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP72]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(22) [[TMP73]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP75]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = add i32 [[TMP76]], 156 -; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP77]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(22) [[TMP78]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = add i32 [[TMP80]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = add i32 [[TMP81]], 160 -; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP82]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(22) [[TMP83]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = add i32 [[TMP85]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = add i32 [[TMP86]], 164 +; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = add i32 [[TMP7]], 60 +; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP75]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(22) [[TMP76]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = add i32 [[TMP7]], 64 +; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP78]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(22) [[TMP79]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = add i32 [[TMP7]], 68 +; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP81]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(22) [[TMP82]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = add i32 [[TMP7]], 72 +; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP84]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = load i32, ptr addrspace(22) [[TMP85]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = add i32 [[TMP7]], 76 ; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP87]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(22) [[TMP88]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP90]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = add i32 [[TMP91]], 168 -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP92]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(22) [[TMP93]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = add i32 [[TMP96]], 172 -; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP97]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(22) [[TMP98]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = add i32 [[TMP100]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = add i32 [[TMP101]], 176 +; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = add i32 [[TMP7]], 80 +; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP90]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = load i32, ptr addrspace(22) [[TMP91]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = add i32 [[TMP7]], 84 +; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP93]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(22) [[TMP94]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = add i32 [[TMP7]], 88 +; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP96]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = load i32, ptr addrspace(22) [[TMP97]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = add i32 [[TMP7]], 92 +; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP99]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(22) [[TMP100]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = add i32 [[TMP7]], 96 ; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP102]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(22) [[TMP103]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = add i32 [[TMP105]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = add i32 [[TMP106]], 180 -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP107]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(22) [[TMP108]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = add i32 [[TMP110]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = add i32 [[TMP111]], 184 +; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = add i32 [[TMP7]], 100 +; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP105]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = load i32, ptr addrspace(22) [[TMP106]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = add i32 [[TMP7]], 104 +; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP108]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(22) [[TMP109]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = add i32 [[TMP3]], 116 ; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP112]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(22) [[TMP113]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = add i32 [[TMP115]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = add i32 [[TMP116]], 188 -; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP117]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = load i32, ptr addrspace(22) [[TMP118]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = add i32 [[TMP120]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = add i32 [[TMP121]], 192 -; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP122]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(22) [[TMP123]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = add i32 [[TMP125]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = add i32 [[TMP126]], 196 -; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP127]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(22) [[TMP128]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = add i32 [[TMP130]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = add i32 [[TMP131]], 200 -; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP132]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(22) [[TMP133]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = add i32 [[TMP135]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = add i32 [[TMP136]], 204 -; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP137]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = load i32, ptr addrspace(22) [[TMP138]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = add i32 [[TMP140]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = add i32 [[TMP141]], 208 -; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP142]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = load i32, ptr addrspace(22) [[TMP143]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = add i32 [[TMP145]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = add i32 [[TMP146]], 212 -; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP147]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = load i32, ptr addrspace(22) [[TMP148]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = add i32 [[TMP150]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = add i32 [[TMP151]], 216 -; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP152]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(22) [[TMP153]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = add i32 [[TMP155]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = add i32 [[TMP156]], 220 -; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP157]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = load i32, ptr addrspace(22) [[TMP158]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = add i32 [[TMP160]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = add i32 [[TMP161]], 224 -; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP162]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = load i32, ptr addrspace(22) [[TMP163]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = add i32 [[TMP4]], 116 -; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP166]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP165]], ptr addrspace(22) [[TMP167]], align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP111]], ptr addrspace(22) [[TMP113]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_053_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 ; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_053_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP172:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP170]]) -; POST-PROCESS-GLOBAL-NEXT: [[TMP173:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP172]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; POST-PROCESS-GLOBAL-NEXT: [[TMP174:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP173]]) +; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP116]]) +; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP118]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP119]]) ; POST-PROCESS-GLOBAL-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP7]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP29]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP175:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP176:%.*]] = add i32 [[TMP175]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP177:%.*]] = add i32 [[TMP176]], 120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP178:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP177]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP34]], ptr addrspace(22) [[TMP178]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP179:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP180:%.*]] = add i32 [[TMP179]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP181:%.*]] = add i32 [[TMP180]], 124 -; POST-PROCESS-GLOBAL-NEXT: [[TMP182:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP181]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP39]], ptr addrspace(22) [[TMP182]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP183:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP184:%.*]] = add i32 [[TMP183]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP185:%.*]] = add i32 [[TMP184]], 128 -; POST-PROCESS-GLOBAL-NEXT: [[TMP186:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP185]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP44]], ptr addrspace(22) [[TMP186]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP187:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP188:%.*]] = add i32 [[TMP187]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP189:%.*]] = add i32 [[TMP188]], 132 -; POST-PROCESS-GLOBAL-NEXT: [[TMP190:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP189]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP49]], ptr addrspace(22) [[TMP190]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP191:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP192:%.*]] = add i32 [[TMP191]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP193:%.*]] = add i32 [[TMP192]], 136 -; POST-PROCESS-GLOBAL-NEXT: [[TMP194:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP193]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP54]], ptr addrspace(22) [[TMP194]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP195:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP196:%.*]] = add i32 [[TMP195]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP197:%.*]] = add i32 [[TMP196]], 140 -; POST-PROCESS-GLOBAL-NEXT: [[TMP198:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP197]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP59]], ptr addrspace(22) [[TMP198]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP199:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP200:%.*]] = add i32 [[TMP199]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP201:%.*]] = add i32 [[TMP200]], 144 -; POST-PROCESS-GLOBAL-NEXT: [[TMP202:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP201]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP64]], ptr addrspace(22) [[TMP202]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP203:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP204:%.*]] = add i32 [[TMP203]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP205:%.*]] = add i32 [[TMP204]], 148 -; POST-PROCESS-GLOBAL-NEXT: [[TMP206:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP205]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP69]], ptr addrspace(22) [[TMP206]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP207:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP208:%.*]] = add i32 [[TMP207]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP209:%.*]] = add i32 [[TMP208]], 152 -; POST-PROCESS-GLOBAL-NEXT: [[TMP210:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP209]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP74]], ptr addrspace(22) [[TMP210]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP211:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP212:%.*]] = add i32 [[TMP211]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP213:%.*]] = add i32 [[TMP212]], 156 -; POST-PROCESS-GLOBAL-NEXT: [[TMP214:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP213]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP79]], ptr addrspace(22) [[TMP214]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP215:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP216:%.*]] = add i32 [[TMP215]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP217:%.*]] = add i32 [[TMP216]], 160 -; POST-PROCESS-GLOBAL-NEXT: [[TMP218:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP217]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP84]], ptr addrspace(22) [[TMP218]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP219:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP220:%.*]] = add i32 [[TMP219]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP221:%.*]] = add i32 [[TMP220]], 164 -; POST-PROCESS-GLOBAL-NEXT: [[TMP222:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP221]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP89]], ptr addrspace(22) [[TMP222]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP223:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP224:%.*]] = add i32 [[TMP223]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP225:%.*]] = add i32 [[TMP224]], 168 -; POST-PROCESS-GLOBAL-NEXT: [[TMP226:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP225]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP94]], ptr addrspace(22) [[TMP226]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP227:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP228:%.*]] = add i32 [[TMP227]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP229:%.*]] = add i32 [[TMP228]], 172 -; POST-PROCESS-GLOBAL-NEXT: [[TMP230:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP229]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP99]], ptr addrspace(22) [[TMP230]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP231:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP232:%.*]] = add i32 [[TMP231]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP233:%.*]] = add i32 [[TMP232]], 176 -; POST-PROCESS-GLOBAL-NEXT: [[TMP234:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP233]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP104]], ptr addrspace(22) [[TMP234]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP235:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP236:%.*]] = add i32 [[TMP235]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP237:%.*]] = add i32 [[TMP236]], 180 -; POST-PROCESS-GLOBAL-NEXT: [[TMP238:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP237]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP109]], ptr addrspace(22) [[TMP238]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP239:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP240:%.*]] = add i32 [[TMP239]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP241:%.*]] = add i32 [[TMP240]], 184 -; POST-PROCESS-GLOBAL-NEXT: [[TMP242:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP241]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP114]], ptr addrspace(22) [[TMP242]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP243:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP244:%.*]] = add i32 [[TMP243]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP245:%.*]] = add i32 [[TMP244]], 188 -; POST-PROCESS-GLOBAL-NEXT: [[TMP246:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP245]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP119]], ptr addrspace(22) [[TMP246]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP247:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP248:%.*]] = add i32 [[TMP247]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP249:%.*]] = add i32 [[TMP248]], 192 -; POST-PROCESS-GLOBAL-NEXT: [[TMP250:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP249]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP124]], ptr addrspace(22) [[TMP250]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP251:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP252:%.*]] = add i32 [[TMP251]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP253:%.*]] = add i32 [[TMP252]], 196 -; POST-PROCESS-GLOBAL-NEXT: [[TMP254:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP253]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP129]], ptr addrspace(22) [[TMP254]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP255:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP256:%.*]] = add i32 [[TMP255]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP257:%.*]] = add i32 [[TMP256]], 200 -; POST-PROCESS-GLOBAL-NEXT: [[TMP258:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP257]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP134]], ptr addrspace(22) [[TMP258]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP259:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP260:%.*]] = add i32 [[TMP259]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP261:%.*]] = add i32 [[TMP260]], 204 -; POST-PROCESS-GLOBAL-NEXT: [[TMP262:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP261]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP139]], ptr addrspace(22) [[TMP262]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP263:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP264:%.*]] = add i32 [[TMP263]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP265:%.*]] = add i32 [[TMP264]], 208 -; POST-PROCESS-GLOBAL-NEXT: [[TMP266:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP265]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP144]], ptr addrspace(22) [[TMP266]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP267:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP268:%.*]] = add i32 [[TMP267]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP269:%.*]] = add i32 [[TMP268]], 212 -; POST-PROCESS-GLOBAL-NEXT: [[TMP270:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP269]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP149]], ptr addrspace(22) [[TMP270]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP271:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP272:%.*]] = add i32 [[TMP271]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP273:%.*]] = add i32 [[TMP272]], 216 -; POST-PROCESS-GLOBAL-NEXT: [[TMP274:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP273]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP154]], ptr addrspace(22) [[TMP274]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP275:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP276:%.*]] = add i32 [[TMP275]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP277:%.*]] = add i32 [[TMP276]], 220 -; POST-PROCESS-GLOBAL-NEXT: [[TMP278:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP277]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP159]], ptr addrspace(22) [[TMP278]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP279:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP280:%.*]] = add i32 [[TMP279]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP281:%.*]] = add i32 [[TMP280]], 224 -; POST-PROCESS-GLOBAL-NEXT: [[TMP282:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP281]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP164]], ptr addrspace(22) [[TMP282]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP283:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP284:%.*]] = add i32 [[TMP283]], 120 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP284]], ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP285:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP286:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @ClosestHit.resume.0 to i64)) -; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP285]], i64 [[TMP286]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18]], !continuation.returnedRegistercount !18 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP29]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP30]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP121]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP32]], ptr addrspace(22) [[TMP122]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = add i32 [[TMP121]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP123]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP35]], ptr addrspace(22) [[TMP124]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = add i32 [[TMP121]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP125]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP38]], ptr addrspace(22) [[TMP126]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = add i32 [[TMP121]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP127]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP41]], ptr addrspace(22) [[TMP128]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = add i32 [[TMP121]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP129]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP44]], ptr addrspace(22) [[TMP130]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = add i32 [[TMP121]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP131]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP47]], ptr addrspace(22) [[TMP132]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = add i32 [[TMP121]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP133]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP50]], ptr addrspace(22) [[TMP134]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = add i32 [[TMP121]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP135]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP53]], ptr addrspace(22) [[TMP136]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = add i32 [[TMP121]], 32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP137]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP56]], ptr addrspace(22) [[TMP138]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = add i32 [[TMP121]], 36 +; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP139]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP59]], ptr addrspace(22) [[TMP140]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = add i32 [[TMP121]], 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP141]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP62]], ptr addrspace(22) [[TMP142]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = add i32 [[TMP121]], 44 +; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP143]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP65]], ptr addrspace(22) [[TMP144]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = add i32 [[TMP121]], 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP145]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP68]], ptr addrspace(22) [[TMP146]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = add i32 [[TMP121]], 52 +; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP147]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP71]], ptr addrspace(22) [[TMP148]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = add i32 [[TMP121]], 56 +; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP149]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP74]], ptr addrspace(22) [[TMP150]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = add i32 [[TMP121]], 60 +; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP151]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP77]], ptr addrspace(22) [[TMP152]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = add i32 [[TMP121]], 64 +; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP153]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP80]], ptr addrspace(22) [[TMP154]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = add i32 [[TMP121]], 68 +; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP155]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP83]], ptr addrspace(22) [[TMP156]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = add i32 [[TMP121]], 72 +; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP157]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP86]], ptr addrspace(22) [[TMP158]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = add i32 [[TMP121]], 76 +; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP159]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP89]], ptr addrspace(22) [[TMP160]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = add i32 [[TMP121]], 80 +; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP161]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP92]], ptr addrspace(22) [[TMP162]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = add i32 [[TMP121]], 84 +; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP163]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP95]], ptr addrspace(22) [[TMP164]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = add i32 [[TMP121]], 88 +; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP165]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP98]], ptr addrspace(22) [[TMP166]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = add i32 [[TMP121]], 92 +; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP167]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP101]], ptr addrspace(22) [[TMP168]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = add i32 [[TMP121]], 96 +; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP169]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP104]], ptr addrspace(22) [[TMP170]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = add i32 [[TMP121]], 100 +; POST-PROCESS-GLOBAL-NEXT: [[TMP172:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP171]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP107]], ptr addrspace(22) [[TMP172]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP173:%.*]] = add i32 [[TMP121]], 104 +; POST-PROCESS-GLOBAL-NEXT: [[TMP174:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP173]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP110]], ptr addrspace(22) [[TMP174]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP175:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP176:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @ClosestHit.resume.0 to i64)) +; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP175]], i64 [[TMP176]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]), !continuation.registercount [[META18]], !continuation.returnedRegistercount !18 ; POST-PROCESS-GLOBAL-NEXT: unreachable ; ; @@ -3645,336 +3094,229 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) ; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -120 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP30]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP32]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(22) [[TMP33]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[TMP36]], 124 +; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP5]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(22) [[TMP29]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP5]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP31]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(22) [[TMP32]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = add i32 [[TMP5]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP34]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(22) [[TMP35]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[TMP5]], 12 ; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP37]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(22) [[TMP38]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], 128 -; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP42]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(22) [[TMP43]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP45]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], 132 -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP47]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(22) [[TMP48]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 136 +; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = add i32 [[TMP5]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP40]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(22) [[TMP41]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = add i32 [[TMP5]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP43]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(22) [[TMP44]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP5]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP46]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(22) [[TMP47]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = add i32 [[TMP5]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP49]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(22) [[TMP50]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP5]], 32 ; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP52]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(22) [[TMP53]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = add i32 [[TMP55]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = add i32 [[TMP56]], 140 -; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP57]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(22) [[TMP58]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[TMP60]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = add i32 [[TMP61]], 144 -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP62]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(22) [[TMP63]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = add i32 [[TMP65]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP66]], 148 +; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = add i32 [[TMP5]], 36 +; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP55]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(22) [[TMP56]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = add i32 [[TMP5]], 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP58]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(22) [[TMP59]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[TMP5]], 44 +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP61]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(22) [[TMP62]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = add i32 [[TMP5]], 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP64]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(22) [[TMP65]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP5]], 52 ; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP67]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(22) [[TMP68]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = add i32 [[TMP70]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 152 -; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP72]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(22) [[TMP73]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP75]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = add i32 [[TMP76]], 156 -; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP77]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(22) [[TMP78]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = add i32 [[TMP80]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = add i32 [[TMP81]], 160 +; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = add i32 [[TMP5]], 56 +; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP70]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = load i32, ptr addrspace(22) [[TMP71]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = add i32 [[TMP5]], 60 +; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP73]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(22) [[TMP74]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP5]], 64 +; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP76]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(22) [[TMP77]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = add i32 [[TMP5]], 68 +; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP79]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(22) [[TMP80]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = add i32 [[TMP5]], 72 ; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP82]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(22) [[TMP83]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = add i32 [[TMP85]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = add i32 [[TMP86]], 164 -; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP87]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(22) [[TMP88]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP90]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = add i32 [[TMP91]], 168 -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP92]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(22) [[TMP93]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = add i32 [[TMP95]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = add i32 [[TMP96]], 172 +; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = add i32 [[TMP5]], 76 +; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP85]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(22) [[TMP86]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = add i32 [[TMP5]], 80 +; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP88]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(22) [[TMP89]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP5]], 84 +; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP91]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(22) [[TMP92]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = add i32 [[TMP5]], 88 +; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP94]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = load i32, ptr addrspace(22) [[TMP95]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = add i32 [[TMP5]], 92 ; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP97]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(22) [[TMP98]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = add i32 [[TMP100]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = add i32 [[TMP101]], 176 -; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP102]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(22) [[TMP103]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = add i32 [[TMP105]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = add i32 [[TMP106]], 180 -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP107]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(22) [[TMP108]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = add i32 [[TMP110]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = add i32 [[TMP111]], 184 +; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = add i32 [[TMP5]], 96 +; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP100]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = load i32, ptr addrspace(22) [[TMP101]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = add i32 [[TMP5]], 100 +; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP103]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(22) [[TMP104]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = add i32 [[TMP5]], 104 +; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP106]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(22) [[TMP107]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = add i32 [[TMP4]], 116 +; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP110]] +; POST-PROCESS-GLOBAL-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(22) [[TMP111]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = add i32 [[TMP4]], 108 ; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP112]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(22) [[TMP113]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = add i32 [[TMP115]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = add i32 [[TMP116]], 188 -; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP117]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = load i32, ptr addrspace(22) [[TMP118]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = add i32 [[TMP120]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = add i32 [[TMP121]], 192 +; POST-PROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(22) [[TMP113]], align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP114]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP30]], ptr addrspace(22) [[TMP115]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = add i32 [[TMP114]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP116]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP33]], ptr addrspace(22) [[TMP117]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = add i32 [[TMP114]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP118]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP36]], ptr addrspace(22) [[TMP119]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = add i32 [[TMP114]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP120]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP39]], ptr addrspace(22) [[TMP121]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = add i32 [[TMP114]], 16 ; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP122]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(22) [[TMP123]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = add i32 [[TMP125]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = add i32 [[TMP126]], 196 -; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP127]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(22) [[TMP128]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = add i32 [[TMP130]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = add i32 [[TMP131]], 200 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP42]], ptr addrspace(22) [[TMP123]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = add i32 [[TMP114]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP124]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP45]], ptr addrspace(22) [[TMP125]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = add i32 [[TMP114]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP126]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP48]], ptr addrspace(22) [[TMP127]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = add i32 [[TMP114]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP128]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP51]], ptr addrspace(22) [[TMP129]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = add i32 [[TMP114]], 32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP130]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP54]], ptr addrspace(22) [[TMP131]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = add i32 [[TMP114]], 36 ; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP132]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(22) [[TMP133]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = add i32 [[TMP135]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = add i32 [[TMP136]], 204 -; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP137]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = load i32, ptr addrspace(22) [[TMP138]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = add i32 [[TMP140]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = add i32 [[TMP141]], 208 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP57]], ptr addrspace(22) [[TMP133]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = add i32 [[TMP114]], 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP134]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP60]], ptr addrspace(22) [[TMP135]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = add i32 [[TMP114]], 44 +; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP136]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP63]], ptr addrspace(22) [[TMP137]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = add i32 [[TMP114]], 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP138]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP66]], ptr addrspace(22) [[TMP139]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = add i32 [[TMP114]], 52 +; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP140]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP69]], ptr addrspace(22) [[TMP141]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = add i32 [[TMP114]], 56 ; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP142]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = load i32, ptr addrspace(22) [[TMP143]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = add i32 [[TMP145]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = add i32 [[TMP146]], 212 -; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP147]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = load i32, ptr addrspace(22) [[TMP148]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = add i32 [[TMP150]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = add i32 [[TMP151]], 216 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP72]], ptr addrspace(22) [[TMP143]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = add i32 [[TMP114]], 60 +; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP144]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP75]], ptr addrspace(22) [[TMP145]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = add i32 [[TMP114]], 64 +; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP146]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP78]], ptr addrspace(22) [[TMP147]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = add i32 [[TMP114]], 68 +; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP148]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP81]], ptr addrspace(22) [[TMP149]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = add i32 [[TMP114]], 72 +; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP150]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP84]], ptr addrspace(22) [[TMP151]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = add i32 [[TMP114]], 76 ; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP152]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = load i32, ptr addrspace(22) [[TMP153]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = add i32 [[TMP155]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = add i32 [[TMP156]], 220 -; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP157]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = load i32, ptr addrspace(22) [[TMP158]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = add i32 [[TMP160]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = add i32 [[TMP161]], 224 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP87]], ptr addrspace(22) [[TMP153]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = add i32 [[TMP114]], 80 +; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP154]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP90]], ptr addrspace(22) [[TMP155]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = add i32 [[TMP114]], 84 +; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP156]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP93]], ptr addrspace(22) [[TMP157]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = add i32 [[TMP114]], 88 +; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP158]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP96]], ptr addrspace(22) [[TMP159]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = add i32 [[TMP114]], 92 +; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP160]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP99]], ptr addrspace(22) [[TMP161]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = add i32 [[TMP114]], 96 ; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP162]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = load i32, ptr addrspace(22) [[TMP163]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = add i32 [[TMP6]], 116 -; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP165]] -; POST-PROCESS-GLOBAL-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(22) [[TMP166]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = add i32 [[TMP6]], 108 -; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP167]] -; POST-PROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(22) [[TMP168]], align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP7]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 10) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 11) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 12) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 13) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 14) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 15) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 16) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 17) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 18) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 19) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 20) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 21) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 22) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 23) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 24) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 25) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 26) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 27) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 28) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP29]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 29) to ptr addrspace(20)), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = add i32 [[TMP169]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = add i32 [[TMP170]], 120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP172:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP171]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP34]], ptr addrspace(22) [[TMP172]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP173:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP174:%.*]] = add i32 [[TMP173]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP175:%.*]] = add i32 [[TMP174]], 124 -; POST-PROCESS-GLOBAL-NEXT: [[TMP176:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP175]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP39]], ptr addrspace(22) [[TMP176]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP177:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP178:%.*]] = add i32 [[TMP177]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP179:%.*]] = add i32 [[TMP178]], 128 -; POST-PROCESS-GLOBAL-NEXT: [[TMP180:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP179]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP44]], ptr addrspace(22) [[TMP180]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP181:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP182:%.*]] = add i32 [[TMP181]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP183:%.*]] = add i32 [[TMP182]], 132 -; POST-PROCESS-GLOBAL-NEXT: [[TMP184:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP183]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP49]], ptr addrspace(22) [[TMP184]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP185:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP186:%.*]] = add i32 [[TMP185]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP187:%.*]] = add i32 [[TMP186]], 136 -; POST-PROCESS-GLOBAL-NEXT: [[TMP188:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP187]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP54]], ptr addrspace(22) [[TMP188]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP189:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP190:%.*]] = add i32 [[TMP189]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP191:%.*]] = add i32 [[TMP190]], 140 -; POST-PROCESS-GLOBAL-NEXT: [[TMP192:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP191]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP59]], ptr addrspace(22) [[TMP192]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP193:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP194:%.*]] = add i32 [[TMP193]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP195:%.*]] = add i32 [[TMP194]], 144 -; POST-PROCESS-GLOBAL-NEXT: [[TMP196:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP195]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP64]], ptr addrspace(22) [[TMP196]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP197:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP198:%.*]] = add i32 [[TMP197]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP199:%.*]] = add i32 [[TMP198]], 148 -; POST-PROCESS-GLOBAL-NEXT: [[TMP200:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP199]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP69]], ptr addrspace(22) [[TMP200]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP201:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP202:%.*]] = add i32 [[TMP201]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP203:%.*]] = add i32 [[TMP202]], 152 -; POST-PROCESS-GLOBAL-NEXT: [[TMP204:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP203]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP74]], ptr addrspace(22) [[TMP204]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP205:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP206:%.*]] = add i32 [[TMP205]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP207:%.*]] = add i32 [[TMP206]], 156 -; POST-PROCESS-GLOBAL-NEXT: [[TMP208:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP207]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP79]], ptr addrspace(22) [[TMP208]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP209:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP210:%.*]] = add i32 [[TMP209]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP211:%.*]] = add i32 [[TMP210]], 160 -; POST-PROCESS-GLOBAL-NEXT: [[TMP212:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP211]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP84]], ptr addrspace(22) [[TMP212]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP213:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP214:%.*]] = add i32 [[TMP213]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP215:%.*]] = add i32 [[TMP214]], 164 -; POST-PROCESS-GLOBAL-NEXT: [[TMP216:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP215]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP89]], ptr addrspace(22) [[TMP216]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP217:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP218:%.*]] = add i32 [[TMP217]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP219:%.*]] = add i32 [[TMP218]], 168 -; POST-PROCESS-GLOBAL-NEXT: [[TMP220:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP219]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP94]], ptr addrspace(22) [[TMP220]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP221:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP222:%.*]] = add i32 [[TMP221]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP223:%.*]] = add i32 [[TMP222]], 172 -; POST-PROCESS-GLOBAL-NEXT: [[TMP224:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP223]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP99]], ptr addrspace(22) [[TMP224]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP225:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP226:%.*]] = add i32 [[TMP225]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP227:%.*]] = add i32 [[TMP226]], 176 -; POST-PROCESS-GLOBAL-NEXT: [[TMP228:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP227]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP104]], ptr addrspace(22) [[TMP228]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP229:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP230:%.*]] = add i32 [[TMP229]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP231:%.*]] = add i32 [[TMP230]], 180 -; POST-PROCESS-GLOBAL-NEXT: [[TMP232:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP231]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP109]], ptr addrspace(22) [[TMP232]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP233:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP234:%.*]] = add i32 [[TMP233]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP235:%.*]] = add i32 [[TMP234]], 184 -; POST-PROCESS-GLOBAL-NEXT: [[TMP236:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP235]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP114]], ptr addrspace(22) [[TMP236]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP237:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP238:%.*]] = add i32 [[TMP237]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP239:%.*]] = add i32 [[TMP238]], 188 -; POST-PROCESS-GLOBAL-NEXT: [[TMP240:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP239]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP119]], ptr addrspace(22) [[TMP240]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP241:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP242:%.*]] = add i32 [[TMP241]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP243:%.*]] = add i32 [[TMP242]], 192 -; POST-PROCESS-GLOBAL-NEXT: [[TMP244:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP243]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP124]], ptr addrspace(22) [[TMP244]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP245:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP246:%.*]] = add i32 [[TMP245]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP247:%.*]] = add i32 [[TMP246]], 196 -; POST-PROCESS-GLOBAL-NEXT: [[TMP248:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP247]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP129]], ptr addrspace(22) [[TMP248]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP249:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP250:%.*]] = add i32 [[TMP249]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP251:%.*]] = add i32 [[TMP250]], 200 -; POST-PROCESS-GLOBAL-NEXT: [[TMP252:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP251]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP134]], ptr addrspace(22) [[TMP252]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP253:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP254:%.*]] = add i32 [[TMP253]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP255:%.*]] = add i32 [[TMP254]], 204 -; POST-PROCESS-GLOBAL-NEXT: [[TMP256:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP255]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP139]], ptr addrspace(22) [[TMP256]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP257:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP258:%.*]] = add i32 [[TMP257]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP259:%.*]] = add i32 [[TMP258]], 208 -; POST-PROCESS-GLOBAL-NEXT: [[TMP260:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP259]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP144]], ptr addrspace(22) [[TMP260]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP261:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP262:%.*]] = add i32 [[TMP261]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP263:%.*]] = add i32 [[TMP262]], 212 -; POST-PROCESS-GLOBAL-NEXT: [[TMP264:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP263]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP149]], ptr addrspace(22) [[TMP264]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP265:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP266:%.*]] = add i32 [[TMP265]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP267:%.*]] = add i32 [[TMP266]], 216 -; POST-PROCESS-GLOBAL-NEXT: [[TMP268:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP267]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP154]], ptr addrspace(22) [[TMP268]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP269:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP270:%.*]] = add i32 [[TMP269]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP271:%.*]] = add i32 [[TMP270]], 220 -; POST-PROCESS-GLOBAL-NEXT: [[TMP272:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP271]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP159]], ptr addrspace(22) [[TMP272]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP273:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP274:%.*]] = add i32 [[TMP273]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP275:%.*]] = add i32 [[TMP274]], 224 -; POST-PROCESS-GLOBAL-NEXT: [[TMP276:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP275]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP164]], ptr addrspace(22) [[TMP276]], align 4 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP102]], ptr addrspace(22) [[TMP163]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = add i32 [[TMP114]], 100 +; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP164]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP105]], ptr addrspace(22) [[TMP165]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = add i32 [[TMP114]], 104 +; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP166]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP108]], ptr addrspace(22) [[TMP167]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP277:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP277]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META18]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = add i32 [[TMP168]], -120 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP169]], ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP170]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META18]] ; POST-PROCESS-GLOBAL-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/remat-intrinsic.ll b/llvmraytracing/test/dx/remat-intrinsic.ll index 1d45c498b4..c2e5314d43 100644 --- a/llvmraytracing/test/dx/remat-intrinsic.ll +++ b/llvmraytracing/test/dx/remat-intrinsic.ll @@ -131,8 +131,9 @@ attributes #1 = { nounwind } ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 -; POSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 +; POSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) ; POSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 ; POSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 @@ -147,11 +148,8 @@ attributes #1 = { nounwind } ; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 ; POSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) @REGISTERS, align 4 ; POSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 8 -; POSTPROCESS-NEXT: store i32 [[TMP8]], ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[TMP10:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @called.resume.0 to i64)) -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP9]], i64 [[TMP10]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @called.resume.0 to i64)) +; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP7]], i64 [[TMP8]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount !14 ; POSTPROCESS-NEXT: unreachable ; ; @@ -164,29 +162,26 @@ attributes #1 = { nounwind } ; POSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -8 -; POSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 -; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POSTPROCESS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; POSTPROCESS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 -; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP7]], align 4 +; POSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 +; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP5]], align 4 ; POSTPROCESS-NEXT: [[HANDLE011:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 ; POSTPROCESS-NEXT: [[HANDLE110:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[HANDLE011]]) ; POSTPROCESS-NEXT: [[HANDLE29:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[HANDLE110]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; POSTPROCESS-NEXT: [[TMP8:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) -; POSTPROCESS-NEXT: [[I8:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[I8:%.*]] = extractelement <3 x i32> [[TMP6]], i8 0 ; POSTPROCESS-NEXT: [[UNPACKED7:%.*]] = call [[DX_TYPES_FOURI32:%.*]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I8]]) -; POSTPROCESS-NEXT: [[TMP9:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) -; POSTPROCESS-NEXT: [[I6:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[I6:%.*]] = extractelement <3 x i32> [[TMP7]], i8 0 ; POSTPROCESS-NEXT: [[UNPACKED5:%.*]] = call [[DX_TYPES_FOURI32]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I6]]) -; POSTPROCESS-NEXT: [[TMP10:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) -; POSTPROCESS-NEXT: [[I4:%.*]] = extractelement <3 x i32> [[TMP10]], i8 0 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[I4:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 ; POSTPROCESS-NEXT: [[UNPACKED3:%.*]] = call [[DX_TYPES_FOURI32]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I4]]) -; POSTPROCESS-NEXT: [[TMP11:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) -; POSTPROCESS-NEXT: [[I2:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[I2:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 ; POSTPROCESS-NEXT: [[UNPACKED1:%.*]] = call [[DX_TYPES_FOURI32]] @dx.op.unpack4x8.i32(i32 219, i8 1, i32 [[I2]]) ; POSTPROCESS-NEXT: [[A:%.*]] = extractvalue [[DX_TYPES_FOURI32]] [[UNPACKED7]], 0 ; POSTPROCESS-NEXT: [[B:%.*]] = extractvalue [[DX_TYPES_FOURI32]] [[UNPACKED5]], 1 @@ -196,6 +191,9 @@ attributes #1 = { nounwind } ; POSTPROCESS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[HANDLE29]], i32 0, i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, i8 15) ; POSTPROCESS-NEXT: store i32 [[PACKED]], ptr addrspace(20) @REGISTERS, align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -8 +; POSTPROCESS-NEXT: store i32 [[TMP11]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP12]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] ; POSTPROCESS-NEXT: unreachable diff --git a/llvmraytracing/test/dx/traceray.ll b/llvmraytracing/test/dx/traceray.ll index 3f1dbad7d0..9a465cd738 100644 --- a/llvmraytracing/test/dx/traceray.ll +++ b/llvmraytracing/test/dx/traceray.ll @@ -1,8 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s +; RUN: grep -v lgc.cps.module %s | grep -v SKIP_GLOBAL_ADDRSPACE | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s ; RUN: count 0 < %t0.stderr -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s +; RUN: grep -v lgc.cps.module %s | grep -v SKIP_GLOBAL_ADDRSPACE | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t1.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s ; RUN: count 0 < %t1.stderr +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t2.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS-GLOBAL %s +; RUN: count 0 < %t2.stderr +; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S 2> %t3.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s +; RUN: count 0 < %t3.stderr +; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S 2> %t4.stderr | FileCheck -check-prefix=DXILCONTPOSTPROCESS-CPS %s +; RUN: count 0 < %t4.stderr target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-v16:32-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -19,6 +25,8 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: %struct.RaytracingAccelerationStructure = type { i32 } %"class.RWTexture2D >" = type { <4 x float> } +@debug_global = external global i32 + @"\01?Scene@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 @@ -34,8 +42,14 @@ declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemD declare %struct.TraversalData @_AmdAwaitAnyHit(i64, %struct.TraversalData, float, i32) #0 +declare void @continuation.continue(i64, ...) #0 + +declare void @_AmdContStackSetPtr(i32) #0 + declare !types !32 i32 @_cont_HitKind(%struct.SystemData*) #0 +declare i64 @_cont_GetContinuationStackGlobalMemBase() ; SKIP_GLOBAL_ADDRSPACE + ; Function Attrs: nounwind declare i64 @_AmdGetResumePointAddr() #1 @@ -96,6 +110,13 @@ define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !typ ret void } +define void @_cont_KernelEntry() #0 !lgc.rt.shaderstage !69 { + %cspInit = ptrtoint ptr @debug_global to i32 + call void @_AmdContStackSetPtr(i32 %cspInit) + call void (i64, ...) @continuation.continue(i64 0, %struct.DispatchSystemData poison) + ret void +} + ; Function Attrs: alwaysinline define i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) #2 !types !45 { %doanyhit = fcmp fast ogt float %t, 0.000000e+00 @@ -298,6 +319,8 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !dx.resources = !{!3} !dx.typeAnnotations = !{!10} !dx.entryPoints = !{!18, !20, !23, !25, !27, !29, !31} +!lgc.cps.module = !{} +!continuation.stackAddrspace = !{!70} ; SKIP_GLOBAL_ADDRSPACE !0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"} !1 = !{i32 1, i32 6} @@ -368,6 +391,9 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !66 = !{i32 2} !67 = !{i32 1} !68 = !{i32 4} +!69 = !{i32 7} +!70 = !{i32 22} + ; LOWERRAYTRACINGPIPELINE-LABEL: define i1 @_cont_IsEndSearch( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() @@ -393,6 +419,14 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 ; ; +; LOWERRAYTRACINGPIPELINE-LABEL: define void @_cont_KernelEntry( +; LOWERRAYTRACINGPIPELINE-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation.registercount [[META22:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-NEXT: [[CSPINIT:%.*]] = ptrtoint ptr @debug_global to i32 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @_AmdContStackSetPtr(i32 [[CSPINIT]]) +; LOWERRAYTRACINGPIPELINE-NEXT: call void (i64, ...) @continuation.continue(i64 0, [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison), !continuation.registercount [[META22]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret void +; +; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.HitData @_cont_GetCandidateState( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 1 @@ -408,7 +442,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define void @MyRayGen( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.entry [[META14:![0-9]+]] !continuation.registercount [[META23]] !continuation [[META36:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META22]] !continuation.entry [[META13:![0-9]+]] !continuation [[META36:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) @@ -419,8 +453,8 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP5]]) #[[ATTR1:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA37:![0-9]+]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 @@ -428,37 +462,31 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR2:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP10]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP14]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META34:![0-9]+]], !continuation.wait.await [[META14]], !continuation.returnedRegistercount !34 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] @await.struct.DispatchSystemData(ptr [[TMP21]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META33:![0-9]+]], !continuation.wait.await [[META13]], !continuation.returnedRegistercount !33 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_STRUCT_DISPATCHSYSTEMDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP21]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP27]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP27]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP27]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP22]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) @@ -469,19 +497,19 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP35]], i8 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() ; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP36]], i8 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP37]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE]](i32 216, [[DX_TYPES_HANDLE]] [[TMP37]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = extractelement <4 x float> [[TMP34]], i64 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = extractelement <4 x float> [[TMP34]], i64 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[TMP34]], i64 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = extractelement <4 x float> [[TMP34]], i64 3 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP38]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP39]], float [[TMP40]], float [[TMP41]], float [[TMP42]], i8 15) ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP5]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META33:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META32:![0-9]+]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyClosestHitShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META41:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META41:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 @@ -489,30 +517,24 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP5]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_I:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr [[HITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP19]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -528,29 +550,25 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = insertelement <4 x float> [[TMP30]], float 1.000000e+00, i64 3 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP31]], ptr [[TMP32]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[TMP33]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[TMP34]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP36]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr [[TMP33]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[TMP37]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP39]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[TMP37]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP36]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP39]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[TMP37]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP44]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP45]], !continuation.registercount [[META34]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP45]], !continuation.registercount [[META33]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.TraversalData @MyAnyHitShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META43:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META43:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -564,31 +582,25 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP9]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP15]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP15]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP22]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_I:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP23]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP28]], ptr [[TMP26]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 @@ -616,32 +628,26 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = fadd fast float [[TMP36]], [[EXTRACT1]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP37]], 0.000000e+00 ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP38]], label [[TMP39:%.*]], label [[TMP61:%.*]] -; LOWERRAYTRACINGPIPELINE: 39: +; LOWERRAYTRACINGPIPELINE: 34: ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP30]], ptr [[TMP29]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP40]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP9]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[TMP41]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[TMP42]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP44]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr i32, ptr [[TMP41]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[TMP45]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP47]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[TMP45]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP41]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP44]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP41]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP45]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP47]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr [[TMP45]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP50]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP51]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP52]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP54]], ptr [[TMP53]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP51]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP46]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = load i32, ptr [[TMP55]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP57]], ptr [[TMP56]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP7]], align 4 @@ -649,32 +655,26 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I1:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP59]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP58]], ptr [[ADDR_I1]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP60]], !continuation.registercount [[META34]] -; LOWERRAYTRACINGPIPELINE: 61: +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP60]], !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE: 51: ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP30]], ptr [[TMP29]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP9]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr [[TMP62]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[TMP63]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP64]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP65]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr i32, ptr [[TMP62]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = getelementptr i32, ptr [[TMP66]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP68]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP66]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP62]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP65]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP66]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP68]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[TMP66]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP70]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP66]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP70]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP66]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP72]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP73]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP75]], ptr [[TMP74]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP72]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP63]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP76]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP78]], ptr [[TMP77]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP6]], align 4 @@ -682,11 +682,11 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I2:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP80]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP79]], ptr [[ADDR_I2]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP81]], !continuation.registercount [[META34]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP81]], !continuation.registercount [[META33]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.TraversalData @MyIntersectionShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META44:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META45:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META44:![0-9]+]] !continuation.registercount [[META32]] !continuation [[META45:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 4 @@ -706,19 +706,17 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE: anyhit.i: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I1]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP8]]), !continuation.registercount [[META33]], !continuation.returnedRegistercount !33 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_TRAVERSALDATA]] @await.struct.TraversalData(ptr [[TMP9]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I1]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP8]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_TRAVERSALDATA]] [[AWAIT_STRUCT_TRAVERSALDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP9]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP10]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE: accepthit.i: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 @@ -729,17 +727,17 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE: _cont_ReportHit.exit: ; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISEND_I]], label [[TMP20:%.*]], label [[TMP22:%.*]] -; LOWERRAYTRACINGPIPELINE: 20: +; LOWERRAYTRACINGPIPELINE: 18: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP21]], !continuation.registercount [[META33]] -; LOWERRAYTRACINGPIPELINE: 22: +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP21]], !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE: 20: ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP23]], !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP23]], !continuation.registercount [[META32]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.TraversalData @MyIntersectionShaderLargeAttrs( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META44]] !continuation.registercount [[META33]] !continuation [[META46:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META44]] !continuation.registercount [[META32]] !continuation [[META46:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_LARGEINTERSECTIONATTRIBUTES:%.*]], align 4 @@ -773,36 +771,34 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE: anyhit.i: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load [[STRUCT_LARGEINTERSECTIONATTRIBUTES]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I1]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[TMP8]]), !continuation.registercount [[META33]], !continuation.returnedRegistercount !33 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_TRAVERSALDATA]] @await.struct.TraversalData(ptr [[TMP9]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I1]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[TMP8]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_TRAVERSALDATA]] [[AWAIT_STRUCT_TRAVERSALDATA:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP9]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP10]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE: accepthit.i: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr getelementptr inbounds ([30 x i32], ptr @PAYLOAD, i32 0, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 3 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 3 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr getelementptr inbounds ([30 x i32], ptr @PAYLOAD, i32 0, i64 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr getelementptr inbounds ([30 x i32], ptr @PAYLOAD, i32 0, i64 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 5 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 3), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 5 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr getelementptr inbounds ([30 x i32], ptr @PAYLOAD, i32 0, i64 4), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 6 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 4), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 6 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr getelementptr inbounds ([30 x i32], ptr @PAYLOAD, i32 0, i64 5), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 5), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP29]], i32 0, i32 1 @@ -811,59 +807,51 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE: _cont_ReportHit.exit: ; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISEND_I]], label [[TMP30:%.*]], label [[TMP32:%.*]] -; LOWERRAYTRACINGPIPELINE: 30: +; LOWERRAYTRACINGPIPELINE: 28: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP31]], !continuation.registercount [[META33]] -; LOWERRAYTRACINGPIPELINE: 32: +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP31]], !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE: 30: ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP33]], !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_TRAVERSALDATA]] [[TMP33]], !continuation.registercount [[META32]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyMissShader( -; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META47:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META48:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META47:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META48:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> , ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP16]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP20]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP20]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP27]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP28]], !continuation.registercount [[META34]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP28]], !continuation.registercount [[META33]] ; ; ; DXILCONTPOSTPROCESS-LABEL: define i1 @_cont_IsEndSearch( @@ -891,6 +879,16 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: ret i32 5 ; ; +; DXILCONTPOSTPROCESS-LABEL: define void @_cont_KernelEntry( +; DXILCONTPOSTPROCESS-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation.registercount [[META22:![0-9]+]] { +; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[CSPINIT:%.*]] = ptrtoint ptr @debug_global to i32 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 0, i32 [[TMP1]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison), !continuation.registercount [[META22]] +; DXILCONTPOSTPROCESS-NEXT: ret void +; +; ; DXILCONTPOSTPROCESS-LABEL: define %struct.HitData @_cont_GetCandidateState( ; DXILCONTPOSTPROCESS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { ; DXILCONTPOSTPROCESS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 1 @@ -906,18 +904,18 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyRayGen( -; DXILCONTPOSTPROCESS-SAME: ) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META22]] !continuation [[META35:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-SAME: ) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META22]] !continuation.entry [[META13:![0-9]+]] !continuation [[META36:![0-9]+]] !continuation.state [[META22]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] @_cont_SetupRayGen() +; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[_CONT_SETUPRAYGEN:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP1]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP3]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP1]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP3]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP4]]) ; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 @@ -929,20 +927,20 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 ; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP11]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META33:![0-9]+]], !continuation.returnedRegistercount !33 ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define dso_local void @MyRayGen.resume.0( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META33]] !continuation [[META35]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META33]] !continuation [[META36]] { ; DXILCONTPOSTPROCESS-NEXT: entryresume.0: ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 @@ -951,13 +949,13 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP6]], i32 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP8]], i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT6:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 @@ -967,8 +965,8 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP10]], i8 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP11]], i8 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP9]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP12]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP9]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP12]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) ; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 @@ -980,7 +978,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyClosestHitShader( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META36:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META37:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META37:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META38:![0-9]+]] !continuation.state [[META22]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -989,13 +987,13 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP6]], i32 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP8]], i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 @@ -1022,13 +1020,13 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP20]], i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP20]], i32 2 ; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP20]], i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP25]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META33]] @@ -1036,7 +1034,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyAnyHitShader( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META38:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META39:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META39:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META40:![0-9]+]] !continuation.state [[META22]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -1071,13 +1069,13 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP6]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP10]], i32 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP12]], i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 @@ -1146,13 +1144,13 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP27]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 ; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP28]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP29]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP29]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP30:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[TMP31:%.*]] = bitcast i32 [[TMP30]] to float @@ -1201,13 +1199,13 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP37]], ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP38:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT18]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP38]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP38]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT21:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 ; DXILCONTPOSTPROCESS-NEXT: [[TMP39:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT21]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP39]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP39]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT24:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[TMP40:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT24]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP40]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_0_I32S]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP40]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT9:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP41:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT9]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[TMP42:%.*]] = bitcast i32 [[TMP41]] to float @@ -1252,13 +1250,14 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyIntersectionShader( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META32:![0-9]+]] !continuation [[META41:![0-9]+]] !continuation.stacksize [[META42:![0-9]+]] !continuation.state [[META42]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META41:![0-9]+]] !continuation.registercount [[META32:![0-9]+]] !continuation [[META42:![0-9]+]] !continuation.stacksize [[META43:![0-9]+]] !continuation.state [[META43]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 @@ -1287,26 +1286,23 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 8 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP6]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShader.resume.0 to i64)) -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP7]], i64 [[TMP8]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShader.resume.0 to i64)) +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP5]], i64 [[TMP6]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 ; DXILCONTPOSTPROCESS-NEXT: unreachable ; DXILCONTPOSTPROCESS: accepthit.i: ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_065_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_065_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_065_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_065_0_VEC_INSERT]], float [[TMP12]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_065_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_065_0_VEC_INSERT]], float [[TMP10]], i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT64:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_065_4_VEC_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT64]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() -; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP13:%.*]], label [[TMP15:%.*]] -; DXILCONTPOSTPROCESS: 13: +; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP11:%.*]], label [[TMP15:%.*]] +; DXILCONTPOSTPROCESS: 11: ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 @@ -1315,6 +1311,9 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP14]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable @@ -1328,20 +1327,20 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 ; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP16]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], -8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP17]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP18]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define dso_local void @MyIntersectionShader.resume.0( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META41]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META41]] !continuation.registercount [[META32]] !continuation [[META42]] { ; DXILCONTPOSTPROCESS-NEXT: entryresume.0: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -8 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 @@ -1352,11 +1351,11 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() -; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP5:%.*]], label [[TMP9:%.*]] -; DXILCONTPOSTPROCESS: 5: -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[TMP7]], align 4 +; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP3:%.*]], label [[TMP9:%.*]] +; DXILCONTPOSTPROCESS: 3: +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[TMP5]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_1_0_EXTRACT12]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT14]], 1, 0 @@ -1365,11 +1364,14 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], -8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP7]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; DXILCONTPOSTPROCESS: 9: -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP11]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 @@ -1381,18 +1383,22 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT24]], 5 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP12]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyIntersectionShaderLargeAttrs( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META43:![0-9]+]] !continuation.stacksize [[META42]] !continuation.state [[META42]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META41]] !continuation.registercount [[META32]] !continuation [[META44:![0-9]+]] !continuation.stacksize [[META43]] !continuation.state [[META43]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 @@ -1427,27 +1433,24 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_4_INSERT]], i32 105, 0, 5 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_6_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_5_INSERT]], i32 106, 0, 6 ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 8 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP6]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShaderLargeAttrs.resume.0 to i64)) -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP7]], i64 [[TMP8]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_6_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShaderLargeAttrs.resume.0 to i64)) +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP5]], i64 [[TMP6]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_6_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 ; DXILCONTPOSTPROCESS-NEXT: unreachable ; DXILCONTPOSTPROCESS: accepthit.i: -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast i32 100 to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_070_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP9]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 101 to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_070_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_070_0_VEC_INSERT]], float [[TMP10]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: store i32 102, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([30 x i32], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 1) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 103, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([30 x i32], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i64 2) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 104, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([30 x i32], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i64 3) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 105, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([30 x i32], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i64 4) to ptr addrspace(20)), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 106, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([30 x i32], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i64 5) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast i32 100 to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_070_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP7]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 101 to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_070_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_070_0_VEC_INSERT]], float [[TMP8]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: store i32 102, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 103, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 2), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 104, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 3), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 105, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 4), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 106, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 5), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_070_4_VEC_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() -; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP11:%.*]], label [[TMP13:%.*]] -; DXILCONTPOSTPROCESS: 11: +; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP9:%.*]], label [[TMP13:%.*]] +; DXILCONTPOSTPROCESS: 9: ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 @@ -1456,6 +1459,9 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP11]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP12]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable @@ -1469,20 +1475,20 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 ; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP14]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP16]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define dso_local void @MyIntersectionShaderLargeAttrs.resume.0( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META43]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META41]] !continuation.registercount [[META32]] !continuation [[META44]] { ; DXILCONTPOSTPROCESS-NEXT: entryresume.0: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -8 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 @@ -1493,11 +1499,11 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() -; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP5:%.*]], label [[TMP9:%.*]] -; DXILCONTPOSTPROCESS: 5: -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[TMP7]], align 4 +; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP3:%.*]], label [[TMP9:%.*]] +; DXILCONTPOSTPROCESS: 3: +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[TMP5]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_1_0_EXTRACT12]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT14]], 1, 0 @@ -1506,11 +1512,14 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], -8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP7]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; DXILCONTPOSTPROCESS: 9: -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP4]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP11]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 @@ -1522,12 +1531,15 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT24]], 5 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP12]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyMissShader( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META44:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META45:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META45:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META46:![0-9]+]] !continuation.state [[META22]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -1536,13 +1548,13 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP6]], i32 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_4_MISS_IN]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP8]], i32 3 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -1551,15 +1563,2498 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) addrspacecast (ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT:%.*]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i32 7) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 8) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) addrspacecast (ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_6_MISS_OUT]], ptr addrspacecast (ptr addrspace(20) @REGISTERS to ptr), i32 0, i32 0, i64 9) to ptr addrspace(20)), align 4 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP13]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META33]] ; DXILCONTPOSTPROCESS-NEXT: unreachable ; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define i1 @_cont_IsEndSearch( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: ret i1 [[ISEND]] +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[VAL:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], ptr [[ADDR]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: ret [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]] +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @_cont_SetTriangleHitAttributes( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: ptr [[DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[VAL:%.*]]) #[[ATTR0]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]], ptr [[ADDR]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: ret void +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define i32 @_cont_GetLocalRootIndex( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: ret i32 5 +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @_cont_KernelEntry( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation.registercount [[META22:![0-9]+]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSPINIT:%.*]] = ptrtoint ptr @debug_global to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 0, i32 [[TMP3]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison), !continuation.registercount [[META22]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: ret void +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define %struct.HitData @_cont_GetCandidateState( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES:%.*]] = load [[STRUCT_HITDATA:%.*]], ptr [[RESPTR]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: ret [[STRUCT_HITDATA]] [[RES]] +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define float @_cont_RayTCurrent( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: ret float [[RES]] +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @MyRayGen( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: ) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META22]] !continuation.entry [[META13:![0-9]+]] !continuation [[META36:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP0:%.*]] = call i32 @_cont_GetContinuationStackAddr() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP0]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[_CONT_SETUPRAYGEN:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP5]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP6]]) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyRayGen.resume.0 to i64)) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP8]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP13]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META34:![0-9]+]], !continuation.returnedRegistercount !34 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define dso_local void @MyRayGen.resume.0( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META34]] !continuation [[META36]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: entryresume.0: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP4]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP6]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP8]], i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT6:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP12]], i8 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP13]], i8 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP11]]) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP14]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP15]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP16]], float [[TMP17]], float [[TMP18]], float [[TMP19]], i8 15) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: ret void +; DXILCONTPOSTPROCESS-GLOBAL: entryresume.0.split: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @MyClosestHitShader( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META37:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META38:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP4]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP6]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP8]], i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_06_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_06_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP12]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_06_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_06_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP13]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP14]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = fsub fast float 1.000000e+00, [[TMP15]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = fsub fast float [[TMP16]], [[TMP17]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = insertelement <4 x float> undef, float [[TMP18]], i64 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP15]], i64 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = insertelement <4 x float> [[TMP20]], float [[TMP17]], i64 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float 1.000000e+00, i64 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP27]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META34]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @MyAnyHitShader( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META39:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META40:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA]], align 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr addrspace(22) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store <2 x float> [[DOTFCA_0_1_0_EXTRACT]], ptr [[DOTFCA_0_1_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store float [[DOTFCA_1_0_EXTRACT]], ptr [[DOTFCA_1_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store <3 x float> [[DOTFCA_2_EXTRACT]], ptr [[DOTFCA_2_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store <3 x float> [[DOTFCA_3_EXTRACT]], ptr [[DOTFCA_3_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store float [[DOTFCA_4_EXTRACT]], ptr [[DOTFCA_4_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i64 [[DOTFCA_5_EXTRACT]], ptr [[DOTFCA_5_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP8]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP10]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP12]], i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP13]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP14]], i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP15]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_LOAD:%.*]] = load <2 x float>, ptr [[VAL_I_FCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[VAL_I_FCA_0_LOAD]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_060_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = bitcast float [[DOTSROA_060_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_060_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_060_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RESPTR_I3:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I3]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I4_FCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[RES_I4_FCA_0_LOAD]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I3]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_1_LOAD:%.*]] = load i32, ptr [[RES_I4_FCA_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I4_FCA_0_INSERT]], i32 [[RES_I4_FCA_1_LOAD]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I4_FCA_1_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_1_INSERT_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store float [[RES_I4_FCA_1_INSERT_FCA_0_EXTRACT]], ptr [[RES_I4_FCA_1_INSERT_FCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I4_FCA_1_INSERT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_1_INSERT_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[RES_I4_FCA_1_INSERT_FCA_1_EXTRACT]], ptr [[RES_I4_FCA_1_INSERT_FCA_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP18]], ptr [[TMP2]]) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[TMP19]], i8 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I_FCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[RES_I_FCA_0_LOAD]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_LOAD:%.*]] = load i32, ptr [[RES_I_FCA_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[RES_I_FCA_1_LOAD]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], ptr [[RES_I_FCA_1_INSERT_FCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT]], ptr [[RES_I_FCA_1_INSERT_FCA_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP20]], ptr [[TMP3]]) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[TMP21]], i8 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RESPTR_I5:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I6_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I5]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I6_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I6_FCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I6_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[RES_I6_FCA_0_LOAD]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I6_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I5]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I6_FCA_1_LOAD:%.*]] = load i32, ptr [[RES_I6_FCA_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I6_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_0_INSERT]], i32 [[RES_I6_FCA_1_LOAD]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I6_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_1_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I6_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_1_INSERT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = fmul fast float [[RES_I6_FCA_1_INSERT_FCA_0_EXTRACT]], [[EXTRACT]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = fadd fast float [[TMP23]], [[EXTRACT1]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], 0.000000e+00 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP38:%.*]] +; DXILCONTPOSTPROCESS-GLOBAL: 26: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP27]]) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP29]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP30]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP31]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = bitcast i32 [[TMP32]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_062_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP33]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = bitcast i32 [[TMP34]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_062_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_062_0_VEC_INSERT]], float [[TMP35]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_062_4_VEC_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ADDR_I1:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP36]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT25:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I1]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT25]], ptr [[DOTFCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_GEP26:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_GEP26]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_LOAD]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_GEP27:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <2 x float>, ptr [[DOTFCA_0_1_0_GEP27]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_GEP28:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP28]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_LOAD]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_GEP29:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP29]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_GEP30:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_2_GEP30]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_LOAD]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_GEP31:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_3_GEP31]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_LOAD]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_GEP32:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_LOAD:%.*]] = load float, ptr [[DOTFCA_4_GEP32]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_LOAD]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_GEP33:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_5_GEP33]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_LOAD]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP37]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META34]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; DXILCONTPOSTPROCESS-GLOBAL: 38: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_EXTRACT15:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT15]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP39]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_EXTRACT18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT18]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP40]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_EXTRACT21:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT21]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP41]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_EXTRACT24:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT24]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP42]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT9:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT9]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = bitcast i32 [[TMP43]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_066_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP44]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT11:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT11]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = bitcast i32 [[TMP45]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_066_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_066_0_VEC_INSERT]], float [[TMP46]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT65:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_066_4_VEC_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ADDR_I2:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP47]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT65]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_GEP35:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I2]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT34]], ptr [[DOTFCA_0_GEP35]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_GEP36:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_LOAD37:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_GEP36]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT38:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_LOAD37]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_GEP39:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_LOAD40:%.*]] = load <2 x float>, ptr [[DOTFCA_0_1_0_GEP39]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT41:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT38]], <2 x float> [[DOTFCA_0_1_0_LOAD40]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_GEP42:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_LOAD43:%.*]] = load float, ptr [[DOTFCA_1_0_GEP42]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT44:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT41]], float [[DOTFCA_1_0_LOAD43]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_GEP45:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_LOAD46:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP45]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT47:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT44]], i32 [[DOTFCA_1_1_LOAD46]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_GEP48:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_LOAD49:%.*]] = load <3 x float>, ptr [[DOTFCA_2_GEP48]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT50:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT47]], <3 x float> [[DOTFCA_2_LOAD49]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_GEP51:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_LOAD52:%.*]] = load <3 x float>, ptr [[DOTFCA_3_GEP51]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT53:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT50]], <3 x float> [[DOTFCA_3_LOAD52]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_GEP54:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_LOAD55:%.*]] = load float, ptr [[DOTFCA_4_GEP54]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT56:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT53]], float [[DOTFCA_4_LOAD55]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_GEP57:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_LOAD58:%.*]] = load i64, ptr [[DOTFCA_5_GEP57]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT59:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT56]], i64 [[DOTFCA_5_LOAD58]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP48]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT59]]), !continuation.registercount [[META34]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @MyIntersectionShader( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META41:![0-9]+]] !continuation.registercount [[META33:![0-9]+]] !continuation [[META42:![0-9]+]] !continuation.stacksize [[META43:![0-9]+]] !continuation.state [[META43]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP3]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i64 [[RETURNADDR]], ptr addrspace(22) [[TMP5]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, float [[DOTFCA_1_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOANYHIT_I:%.*]] = fcmp fast ogt float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], 0.000000e+00 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: br i1 [[DOANYHIT_I]], label [[ANYHIT_I:%.*]], label [[ACCEPTHIT_I:%.*]] +; DXILCONTPOSTPROCESS-GLOBAL: anyhit.i: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShader.resume.0 to i64)) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META33]], !continuation.returnedRegistercount !33 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; DXILCONTPOSTPROCESS-GLOBAL: accepthit.i: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_065_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP9]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_065_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_065_0_VEC_INSERT]], float [[TMP11]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT64:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_065_4_VEC_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT64]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: br i1 [[ISEND_I]], label [[TMP12:%.*]], label [[TMP16:%.*]] +; DXILCONTPOSTPROCESS-GLOBAL: 12: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT37:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT34]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT40:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT37]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], -8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP15]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; DXILCONTPOSTPROCESS-GLOBAL: 16: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], -8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP19]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define dso_local void @MyIntersectionShader.resume.0( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META41]] !continuation.registercount [[META33]] !continuation [[META42]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: entryresume.0: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: br i1 [[ISEND_I]], label [[TMP5:%.*]], label [[TMP10:%.*]] +; DXILCONTPOSTPROCESS-GLOBAL: 5: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP4]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(22) [[TMP6]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_1_0_EXTRACT12]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT14]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT37:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT34]], i32 [[DOTFCA_1_1_EXTRACT16]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT40:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT37]], <3 x float> [[DOTFCA_2_EXTRACT18]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT22]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], -8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP9]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; DXILCONTPOSTPROCESS-GLOBAL: 10: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP4]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(22) [[TMP11]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT12]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT14]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT16]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT18]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT22]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @MyIntersectionShaderLargeAttrs( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META41]] !continuation.registercount [[META33]] !continuation [[META44:![0-9]+]] !continuation.stacksize [[META43]] !continuation.state [[META43]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP3]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i64 [[RETURNADDR]], ptr addrspace(22) [[TMP5]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, float [[DOTFCA_1_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOANYHIT_I:%.*]] = fcmp fast ogt float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], 0.000000e+00 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: br i1 [[DOANYHIT_I]], label [[ANYHIT_I:%.*]], label [[ACCEPTHIT_I:%.*]] +; DXILCONTPOSTPROCESS-GLOBAL: anyhit.i: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES:%.*]] poison, i32 100, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_0_INSERT]], i32 101, 0, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_1_INSERT]], i32 102, 0, 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_2_INSERT]], i32 103, 0, 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_3_INSERT]], i32 104, 0, 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_4_INSERT]], i32 105, 0, 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_6_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_5_INSERT]], i32 106, 0, 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShaderLargeAttrs.resume.0 to i64)) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_6_INSERT]]), !continuation.registercount [[META33]], !continuation.returnedRegistercount !33 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; DXILCONTPOSTPROCESS-GLOBAL: accepthit.i: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast i32 100 to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_070_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = bitcast i32 101 to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_070_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_070_0_VEC_INSERT]], float [[TMP9]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 102, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 103, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 2), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 104, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 3), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 105, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 4), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 106, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 5), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_070_4_VEC_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: br i1 [[ISEND_I]], label [[TMP10:%.*]], label [[TMP14:%.*]] +; DXILCONTPOSTPROCESS-GLOBAL: 10: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT37:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT34]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT40:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT37]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP13]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; DXILCONTPOSTPROCESS-GLOBAL: 14: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], -8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP17]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define dso_local void @MyIntersectionShaderLargeAttrs.resume.0( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META41]] !continuation.registercount [[META33]] !continuation [[META44]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: entryresume.0: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: br i1 [[ISEND_I]], label [[TMP5:%.*]], label [[TMP10:%.*]] +; DXILCONTPOSTPROCESS-GLOBAL: 5: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP4]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(22) [[TMP6]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_1_0_EXTRACT12]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT14]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT37:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT34]], i32 [[DOTFCA_1_1_EXTRACT16]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT40:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT37]], <3 x float> [[DOTFCA_2_EXTRACT18]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT22]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], -8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP9]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; DXILCONTPOSTPROCESS-GLOBAL: 10: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP4]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(22) [[TMP11]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT12]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT14]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT16]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT18]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT22]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @MyMissShader( +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META45:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META46:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP4]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP6]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP8]], i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP15]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META34]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define i1 @_cont_IsEndSearch( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret i1 [[ISEND]] +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], ptr [[ADDR]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]] +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @_cont_SetTriangleHitAttributes( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[VAL:%.*]]) #[[ATTR0]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]], ptr [[ADDR]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret void +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret i32 5 +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @_cont_KernelEntry( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation.registercount [[META22:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[CSPINIT:%.*]] = ptrtoint ptr @debug_global to i32 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_AmdContStackSetPtr(i32 [[CSPINIT]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 0, [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison), !continuation.registercount [[META22]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret void +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define %struct.HitData @_cont_GetCandidateState( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES:%.*]] = load [[STRUCT_HITDATA:%.*]], ptr [[RESPTR]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret [[STRUCT_HITDATA]] [[RES]] +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define float @_cont_RayTCurrent( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret float [[RES]] +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyRayGen( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22]] !lgc.cps [[META22]] !continuation [[META36:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP4]] to ptr +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP5]]) #[[ATTR1:[0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA37:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR_I:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR2:[0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP11]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP23]], ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP26]], ptr [[TMP24]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } (...) @lgc.cps.await__sl_s_struct.DispatchSystemDatasa21i32a10i32s(i32 4, i32 4, i64 -1, i32 5, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [6 x i32] poison, [10 x i32] [[TMP27]]), !continuation.registercount [[META33:![0-9]+]], !continuation.wait.await [[META13:![0-9]+]], !continuation.returnedRegistercount !33 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP28]], 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[TMP29]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP28]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP31]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP33]], ptr [[TMP38]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP38]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP44]], ptr [[TMP42]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP38]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP47]], ptr [[TMP45]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP30]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[DOTSPLIT:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: .split: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP48:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA37]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP49:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP49]], i8 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP50:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP50]], i8 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP51:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP52:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE]](i32 216, [[DX_TYPES_HANDLE]] [[TMP51]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP53:%.*]] = extractelement <4 x float> [[TMP48]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP54:%.*]] = extractelement <4 x float> [[TMP48]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP55:%.*]] = extractelement <4 x float> [[TMP48]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP56:%.*]] = extractelement <4 x float> [[TMP48]], i64 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP52]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP53]], float [[TMP54]], float [[TMP55]], float [[TMP56]], i8 15) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP5]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret void +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyClosestHitShader( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !lgc.cps [[META41:![0-9]+]] !continuation [[META42:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP16]], ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP19]], ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_I:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I]], ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP20]], ptr [[HITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP25]], ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[HITATTRS]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = load <2 x float>, ptr [[TMP26]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[TMP27]], i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = fsub fast float 1.000000e+00, [[TMP28]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = extractelement <2 x float> [[TMP27]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = fsub fast float [[TMP29]], [[TMP30]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = insertelement <4 x float> undef, float [[TMP31]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP28]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP30]], i64 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float 1.000000e+00, i64 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP35]], ptr [[TMP36]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP38]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP44]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP39]], ptr [[TMP43]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP50]], ptr [[TMP48]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP52]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP53]], ptr [[TMP51]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP55:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP54]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP56:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 3, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP55]], [21 x i32] poison, [10 x i32] [[TMP56]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyAnyHitShader( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[HIT_ATTRS:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META43:![0-9]+]] !lgc.cps [[META40]] !continuation [[META44:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP7]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP12]], ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP16]], ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP18]], ptr [[TMP22]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP25]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_I:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP21]], ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP24]], ptr [[TMP30]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[HIT_ATTRS]], ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP7]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I3:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I4:%.*]] = load [[STRUCT_HITDATA]], ptr [[RESPTR_I3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I4]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP35]], ptr [[TMP2]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[TMP36]], i8 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[RESPTR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP37]], ptr [[TMP3]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[TMP38]], i8 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I5:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I6:%.*]] = load [[STRUCT_HITDATA]], ptr [[RESPTR_I5]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I6]], ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I7:%.*]] = load float, ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP40:%.*]] = fmul fast float [[RES_I7]], [[EXTRACT]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = fadd fast float [[TMP40]], [[EXTRACT1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = fcmp fast ogt float [[TMP41]], 0.000000e+00 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP42]], label [[TMP43:%.*]], label [[TMP72:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 35: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP34]], ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP44]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP7]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP46]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP52]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP49]], ptr [[TMP51]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr [[TMP51]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, ptr [[TMP52]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP57]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP50]], ptr [[TMP56]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[TMP51]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[TMP52]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP60]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP47]], ptr [[TMP59]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP48:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP48]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP65]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP53]], ptr [[TMP66]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP68:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR_I1:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP69]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP68]], ptr [[ADDR_I1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP70:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP71:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 20, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[TMP70]], [8 x i32] poison, [10 x i32] [[TMP71]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; LOWERRAYTRACINGPIPELINE-CPS: 56: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP34]], ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP7]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP73]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP58]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP79:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP80:%.*]] = getelementptr inbounds i32, ptr [[TMP73]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP80]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP61]], ptr [[TMP79]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP84:%.*]] = getelementptr inbounds i32, ptr [[TMP79]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP85:%.*]] = getelementptr inbounds i32, ptr [[TMP80]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP85]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP64]], ptr [[TMP84]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP87:%.*]] = getelementptr inbounds i32, ptr [[TMP79]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP88:%.*]] = getelementptr inbounds i32, ptr [[TMP80]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP88]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP67]], ptr [[TMP87]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP74:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP74]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP93:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP94:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP93]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP75]], ptr [[TMP94]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP96:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR_I2:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP97]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP96]], ptr [[ADDR_I2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP98:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP99:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 20, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[TMP98]], [8 x i32] poison, [10 x i32] [[TMP99]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyIntersectionShader( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META45:![0-9]+]] !continuation [[META46:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[RESPTR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I1:%.*]] = load float, ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP3]] to ptr +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP5]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOANYHIT_I:%.*]] = fcmp fast ogt float [[RES_I1]], 0.000000e+00 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[DOANYHIT_I]], label [[ANYHIT_I:%.*]], label [[ACCEPTHIT_I:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: anyhit.i: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } (...) @lgc.cps.await__sl_s_struct.TraversalDatasa8i32a30i32s(i32 3, i32 8, i32 5, float [[RES_I1]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP7]], [20 x i32] poison, [30 x i32] [[TMP8]]), !continuation.registercount [[META32:![0-9]+]], !continuation.returnedRegistercount !32 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[TMP10]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP11]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: accepthit.i: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PAYLOAD_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP21]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP20]], ptr [[ADDR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT]] +; LOWERRAYTRACINGPIPELINE-CPS: _cont_ReportHit.exit: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISEND_I]], label [[TMP22:%.*]], label [[TMP25:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 20: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[TMP23]], [8 x i32] poison, [30 x i32] [[TMP24]]), !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; LOWERRAYTRACINGPIPELINE-CPS: 23: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP5]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[TMP26]], [8 x i32] poison, [30 x i32] [[TMP27]]), !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyIntersectionShaderLargeAttrs( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META45]] !continuation [[META47:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_LARGEINTERSECTIONATTRIBUTES:%.*]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[RESPTR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RES_I1:%.*]] = load float, ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PTR0:%.*]] = getelementptr [[STRUCT_LARGEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 100, ptr [[PTR0]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PTR1:%.*]] = getelementptr [[STRUCT_LARGEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 101, ptr [[PTR1]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PTR2:%.*]] = getelementptr [[STRUCT_LARGEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], i32 0, i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 102, ptr [[PTR2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PTR3:%.*]] = getelementptr [[STRUCT_LARGEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], i32 0, i32 0, i32 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 103, ptr [[PTR3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PTR4:%.*]] = getelementptr [[STRUCT_LARGEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], i32 0, i32 0, i32 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 104, ptr [[PTR4]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PTR5:%.*]] = getelementptr [[STRUCT_LARGEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], i32 0, i32 0, i32 5 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 105, ptr [[PTR5]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PTR6:%.*]] = getelementptr [[STRUCT_LARGEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], i32 0, i32 0, i32 6 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 106, ptr [[PTR6]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP3]] to ptr +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP5]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOANYHIT_I:%.*]] = fcmp fast ogt float [[RES_I1]], 0.000000e+00 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[DOANYHIT_I]], label [[ANYHIT_I:%.*]], label [[ACCEPTHIT_I:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: anyhit.i: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load [[STRUCT_LARGEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } (...) @lgc.cps.await__sl_s_struct.TraversalDatasa8i32a30i32s(i32 3, i32 8, i32 5, float [[RES_I1]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[TMP7]], [15 x i32] poison, [30 x i32] [[TMP8]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[TMP10]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP11]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: accepthit.i: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PAYLOAD_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP22]], ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP26]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 5 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP29]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 6 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP32]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP36]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP35]], ptr [[ADDR_I]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT]] +; LOWERRAYTRACINGPIPELINE-CPS: _cont_ReportHit.exit: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISEND_I]], label [[TMP37:%.*]], label [[TMP40:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 34: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[TMP38]], [8 x i32] poison, [30 x i32] [[TMP39]]), !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; LOWERRAYTRACINGPIPELINE-CPS: 37: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP5]]) #[[ATTR1]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 4, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[TMP41]], [8 x i32] poison, [30 x i32] [[TMP42]]), !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +; +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyMissShader( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META45]] !lgc.cps [[META41]] !continuation [[META48:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP6]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP15]], ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> , ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP1]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP21]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP22]], ptr [[TMP26]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP33]], ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP36]], ptr [[TMP34]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR]], i32 3, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP38]], [21 x i32] poison, [10 x i32] [[TMP39]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define i1 @_cont_IsEndSearch( +; DXILCONTPOSTPROCESS-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() +; DXILCONTPOSTPROCESS-CPS-NEXT: ret i1 [[ISEND]] +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes( +; DXILCONTPOSTPROCESS-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[VAL:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], ptr [[ADDR]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: ret [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]] +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define void @_cont_SetTriangleHitAttributes( +; DXILCONTPOSTPROCESS-CPS-SAME: ptr [[DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[VAL:%.*]]) #[[ATTR0]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]], ptr [[ADDR]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: ret void +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; DXILCONTPOSTPROCESS-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: ret i32 5 +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define void @_cont_KernelEntry( +; DXILCONTPOSTPROCESS-CPS-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation.registercount [[META22:![0-9]+]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSPINIT:%.*]] = ptrtoint ptr @debug_global to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 0, i32 [[TMP1]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison), !continuation.registercount [[META22]] +; DXILCONTPOSTPROCESS-CPS-NEXT: ret void +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define %struct.HitData @_cont_GetCandidateState( +; DXILCONTPOSTPROCESS-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES:%.*]] = load [[STRUCT_HITDATA:%.*]], ptr [[RESPTR]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: ret [[STRUCT_HITDATA]] [[RES]] +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define float @_cont_RayTCurrent( +; DXILCONTPOSTPROCESS-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: ret float [[RES]] +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define void @MyRayGen( +; DXILCONTPOSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22]] !lgc.cps [[META22]] !continuation [[META36:![0-9]+]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[_CONT_SETUPRAYGEN:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT20:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP5]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP6]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT20]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyRayGen.resume.0 to i64)) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP8]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP9]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 undef, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 undef, 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 undef, 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 undef, 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 undef, 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 undef, 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP10]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP11]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP12]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyRayGen.resume.0 to i64)) +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP13]], i64 [[TMP14]], i32 5, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [6 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33:![0-9]+]], !continuation.returnedRegistercount !33 +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define dso_local void @MyRayGen.resume.0( +; DXILCONTPOSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [21 x i32], [10 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META22]] !lgc.cps [[META22]] !continuation [[META36]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: entryresume.0: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] }, align 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP3]], ptr [[TMP4]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP3]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP3]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[DOTFCA_0_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[DOTFCA_7_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[DOTFCA_8_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP9]], i32 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[DOTFCA_9_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT21:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP12]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP13]], i8 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP14]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP15]], i8 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP11]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP16]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP17]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP18]], float [[TMP19]], float [[TMP20]], float [[TMP21]], i8 15) +; DXILCONTPOSTPROCESS-CPS-NEXT: ret void +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define void @MyClosestHitShader( +; DXILCONTPOSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META37:![0-9]+]] !lgc.cps [[META38:![0-9]+]] !continuation [[META39:![0-9]+]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_011_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_011_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP7]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = fsub fast float 1.000000e+00, [[TMP8]] +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = fsub fast float [[TMP9]], [[TMP10]] +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP11]], i64 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP8]], i64 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP10]], i64 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float 1.000000e+00, i64 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP15]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP15]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP15]], i32 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP15]], i32 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT10:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP16]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP17]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP18]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP19]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = add i32 [[TMP20]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP21]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP22:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP23:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP22]], i32 [[TMP23]], i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT10]], [21 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define void @MyAnyHitShader( +; DXILCONTPOSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[HIT_ATTRS:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !lgc.cps [[META37]] !continuation [[META41:![0-9]+]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP0:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA]], align 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: store <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_0_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: store <2 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: store float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_2_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_3_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_4_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: store float [[SYSTEM_DATA_FCA_4_EXTRACT]], ptr [[SYSTEM_DATA_FCA_4_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_5_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], ptr [[SYSTEM_DATA_FCA_5_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP3]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP4]], i32 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP5]], i32 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP6]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[VAL_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[VAL_I_FCA_0_LOAD:%.*]] = load <2 x float>, ptr [[VAL_I_FCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[VAL_I_FCA_0_LOAD]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_099_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_099_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_099_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_099_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[HIT_ATTRS_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[HIT_ATTRS]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RESPTR_I3:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I4_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I3]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I4_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I4_FCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I4_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[RES_I4_FCA_0_LOAD]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I4_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I3]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I4_FCA_1_LOAD:%.*]] = load i32, ptr [[RES_I4_FCA_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I4_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I4_FCA_0_INSERT]], i32 [[RES_I4_FCA_1_LOAD]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I4_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I4_FCA_1_INSERT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I4_FCA_1_INSERT_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP0]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: store float [[RES_I4_FCA_1_INSERT_FCA_0_EXTRACT]], ptr [[RES_I4_FCA_1_INSERT_FCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I4_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I4_FCA_1_INSERT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I4_FCA_1_INSERT_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP0]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[RES_I4_FCA_1_INSERT_FCA_1_EXTRACT]], ptr [[RES_I4_FCA_1_INSERT_FCA_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP9]], ptr [[TMP0]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[TMP10]], i8 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I_FCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[RES_I_FCA_0_LOAD]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_LOAD:%.*]] = load i32, ptr [[RES_I_FCA_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[RES_I_FCA_1_LOAD]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: store float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], ptr [[RES_I_FCA_1_INSERT_FCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT]], ptr [[RES_I_FCA_1_INSERT_FCA_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP11]], ptr [[TMP1]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[TMP12]], i8 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RESPTR_I5:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I6_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I5]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I6_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I6_FCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I6_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[RES_I6_FCA_0_LOAD]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I6_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I5]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I6_FCA_1_LOAD:%.*]] = load i32, ptr [[RES_I6_FCA_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I6_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_0_INSERT]], i32 [[RES_I6_FCA_1_LOAD]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I6_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_1_INSERT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I6_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_1_INSERT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = fmul fast float [[RES_I6_FCA_1_INSERT_FCA_0_EXTRACT]], [[EXTRACT]] +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP14]], [[EXTRACT1]] +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP15]], 0.000000e+00 +; DXILCONTPOSTPROCESS-CPS-NEXT: br i1 [[TMP16]], label [[TMP17:%.*]], label [[TMP32:%.*]] +; DXILCONTPOSTPROCESS-CPS: 17: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP18]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP22:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP23:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0102_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP24]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP25:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP26:%.*]] = bitcast i32 [[TMP25]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0102_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0102_0_VEC_INSERT]], float [[TMP26]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT101:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0102_4_VEC_INSERT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[ADDR_I1:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP27]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT101]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I1]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT]], ptr [[DOTFCA_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_LOAD]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <2 x float>, ptr [[DOTFCA_0_1_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_LOAD]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_2_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT69:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_LOAD]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_3_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT70:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT69]], <3 x float> [[DOTFCA_3_LOAD]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_LOAD:%.*]] = load float, ptr [[DOTFCA_4_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT71:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT70]], float [[DOTFCA_4_LOAD]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_5_GEP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT72:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT71]], i64 [[DOTFCA_5_LOAD]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP19]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP20]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP21]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP22]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP28:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP29]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP30:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP31:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP30]], i32 [[TMP31]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT72]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; DXILCONTPOSTPROCESS-CPS: 32: +; DXILCONTPOSTPROCESS-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT15:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP33:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT15]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP34:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT18]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT21:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP35:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT21]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT24:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP36:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT24]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT9:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP37:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT9]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP38:%.*]] = bitcast i32 [[TMP37]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0106_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP38]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT11:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP39:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT11]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP40:%.*]] = bitcast i32 [[TMP39]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0106_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0106_0_VEC_INSERT]], float [[TMP40]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT105:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0106_4_VEC_INSERT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[ADDR_I2:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP41]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT73:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT105]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_GEP74:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I2]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT73]], ptr [[DOTFCA_0_GEP74]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_GEP75:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_LOAD76:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_GEP75]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_INSERT77:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_LOAD76]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_GEP78:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_LOAD79:%.*]] = load <2 x float>, ptr [[DOTFCA_0_1_0_GEP78]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT80:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT77]], <2 x float> [[DOTFCA_0_1_0_LOAD79]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_GEP81:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_LOAD82:%.*]] = load float, ptr [[DOTFCA_1_0_GEP81]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT83:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT80]], float [[DOTFCA_1_0_LOAD82]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_GEP84:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_LOAD85:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP84]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT86:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT83]], i32 [[DOTFCA_1_1_LOAD85]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_GEP87:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_LOAD88:%.*]] = load <3 x float>, ptr [[DOTFCA_2_GEP87]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT89:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT86]], <3 x float> [[DOTFCA_2_LOAD88]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_GEP90:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_LOAD91:%.*]] = load <3 x float>, ptr [[DOTFCA_3_GEP90]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT92:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT89]], <3 x float> [[DOTFCA_3_LOAD91]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_GEP93:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_LOAD94:%.*]] = load float, ptr [[DOTFCA_4_GEP93]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT95:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT92]], float [[DOTFCA_4_LOAD94]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_GEP96:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_LOAD97:%.*]] = load i64, ptr [[DOTFCA_5_GEP96]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT98:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT95]], i64 [[DOTFCA_5_LOAD97]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT27:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP33]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT30:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT27]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT33:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT30]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT36:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT33]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT39:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT36]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT42:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT39]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT45:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT42]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT48:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT45]], i32 [[TMP34]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT51:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT48]], i32 [[TMP35]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT54:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT51]], i32 [[TMP36]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP42:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP43:%.*]] = add i32 [[TMP42]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP43]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP44:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP45:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP44]], i32 [[TMP45]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT98]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT54]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define void @MyIntersectionShader( +; DXILCONTPOSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META38]] !lgc.cps [[META42:![0-9]+]] !continuation [[META43:![0-9]+]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP0:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP1]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP0]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP3]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_4_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_5_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOANYHIT_I:%.*]] = fcmp fast ogt float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], 0.000000e+00 +; DXILCONTPOSTPROCESS-CPS-NEXT: br i1 [[DOANYHIT_I]], label [[ANYHIT_I:%.*]], label [[ACCEPTHIT_I:%.*]] +; DXILCONTPOSTPROCESS-CPS: anyhit.i: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_INSERT]], <2 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[SYSTEM_DATA_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT326:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT7:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT4]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT10:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT7]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT13:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT10]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT16:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT13]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT19:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT16]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT22:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT19]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT25:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT22]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT28:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT25]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT31:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT28]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT34:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT31]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT37:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT34]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT40:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT37]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT43:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT40]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT46:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT43]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT49:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT46]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT52:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT49]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT55:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT52]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT58:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT55]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT61:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT58]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT64:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT61]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT67:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT64]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT70:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT67]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT73:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT70]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT76:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT73]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT79:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT76]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT82:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT79]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT85:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT82]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT88:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT85]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT91:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT88]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShader.resume.0 to i64)) +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP4]], i64 [[TMP5]], i32 5, float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT326]], [20 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT91]]), !continuation.registercount [[META32:![0-9]+]], !continuation.returnedRegistercount !32 +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; DXILCONTPOSTPROCESS-CPS: accepthit.i: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0329_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP7]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0329_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0329_0_VEC_INSERT]], float [[TMP9]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT328:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0329_4_VEC_INSERT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT288:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT328]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; DXILCONTPOSTPROCESS-CPS-NEXT: br i1 [[ISEND_I]], label [[TMP10:%.*]], label [[TMP15:%.*]] +; DXILCONTPOSTPROCESS-CPS: 10: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_INSERT291:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT294:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT291]], <2 x float> [[DOTFCA_0_EXTRACT288]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT297:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT294]], float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT300:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT297]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT303:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT300]], <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT306:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT303]], <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT309:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT306]], float [[SYSTEM_DATA_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT312:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT309]], i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT124:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT127:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT124]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT130:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT127]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT133:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT130]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT136:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT133]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT139:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT136]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT142:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT139]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT145:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT142]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT148:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT145]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT151:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT148]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT154:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT151]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT157:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT154]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT160:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT157]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT163:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT160]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT166:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT163]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT169:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT166]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT172:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT169]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT175:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT172]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT178:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT175]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT181:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT178]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT184:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT181]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT187:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT184]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT190:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT187]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT193:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT190]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT196:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT193]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT199:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT196]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT202:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT199]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT205:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT202]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT208:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT205]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT211:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT208]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -8 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP13]], i32 [[TMP14]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT312]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; DXILCONTPOSTPROCESS-CPS: 15: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT288]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT272:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT273:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT272]], <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT274:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT273]], float [[SYSTEM_DATA_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT275:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT274]], i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], -8 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP17]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP18]], i32 [[TMP19]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT275]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define dso_local void @MyIntersectionShader.resume.0( +; DXILCONTPOSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_TRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META38]] !lgc.cps [[META42]] !continuation [[META43]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: entryresume.0: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], -8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_EXTRACT280:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_EXTRACT282:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_EXTRACT284:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_EXTRACT286:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; DXILCONTPOSTPROCESS-CPS-NEXT: br i1 [[ISEND_I]], label [[TMP8:%.*]], label [[TMP15:%.*]] +; DXILCONTPOSTPROCESS-CPS: 8: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_INSERT291:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT294:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT291]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT297:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT294]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT300:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT297]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT303:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT300]], <3 x float> [[DOTFCA_2_EXTRACT280]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT306:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT303]], <3 x float> [[DOTFCA_3_EXTRACT282]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT309:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT306]], float [[DOTFCA_4_EXTRACT284]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT312:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT309]], i64 [[DOTFCA_5_EXTRACT286]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT124:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT127:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT124]], i32 [[DOTFCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT130:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT127]], i32 [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT133:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT130]], i32 [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT136:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT133]], i32 [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT139:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT136]], i32 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT142:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT139]], i32 [[DOTFCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT145:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT142]], i32 [[DOTFCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT148:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT145]], i32 [[DOTFCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT151:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT148]], i32 [[DOTFCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT154:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT151]], i32 [[DOTFCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT157:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT154]], i32 [[DOTFCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT160:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT157]], i32 [[DOTFCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT163:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT160]], i32 [[DOTFCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT166:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT163]], i32 [[DOTFCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT169:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT166]], i32 [[DOTFCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT172:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT169]], i32 [[DOTFCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT175:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT172]], i32 [[DOTFCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT178:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT175]], i32 [[DOTFCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT181:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT178]], i32 [[DOTFCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT184:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT181]], i32 [[DOTFCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT187:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT184]], i32 [[DOTFCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT190:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT187]], i32 [[DOTFCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT193:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT190]], i32 [[DOTFCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT196:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT193]], i32 [[DOTFCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT199:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT196]], i32 [[DOTFCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT202:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT199]], i32 [[DOTFCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT205:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT202]], i32 [[DOTFCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT208:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT205]], i32 [[DOTFCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT211:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT208]], i32 [[DOTFCA_29_EXTRACT]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -8 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = zext i32 [[RETURN_ADDR_RELOAD2]] to i64 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP13]], i32 [[TMP14]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT312]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; DXILCONTPOSTPROCESS-CPS: 15: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP16]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP17]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT272:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT280]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT273:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT272]], <3 x float> [[DOTFCA_3_EXTRACT282]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT274:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT273]], float [[DOTFCA_4_EXTRACT284]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT275:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT274]], i64 [[DOTFCA_5_EXTRACT286]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], -8 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP19]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP20]], i32 [[TMP21]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT275]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define void @MyIntersectionShaderLargeAttrs( +; DXILCONTPOSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META38]] !lgc.cps [[META42]] !continuation [[META44:![0-9]+]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP0:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP1]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP0]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP2]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP3]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_4_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_5_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOANYHIT_I:%.*]] = fcmp fast ogt float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], 0.000000e+00 +; DXILCONTPOSTPROCESS-CPS-NEXT: br i1 [[DOANYHIT_I]], label [[ANYHIT_I:%.*]], label [[ACCEPTHIT_I:%.*]] +; DXILCONTPOSTPROCESS-CPS: anyhit.i: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_INSERT]], <2 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[SYSTEM_DATA_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES:%.*]] poison, i32 100, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_0_INSERT]], i32 101, 0, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_1_INSERT]], i32 102, 0, 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_2_INSERT]], i32 103, 0, 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_3_INSERT]], i32 104, 0, 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_4_INSERT]], i32 105, 0, 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_6_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_5_INSERT]], i32 106, 0, 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT7:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT4]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT10:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT7]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT13:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT10]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT16:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT13]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT19:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT16]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT22:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT19]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT25:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT22]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT28:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT25]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT31:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT28]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT34:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT31]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT37:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT34]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT40:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT37]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT43:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT40]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT46:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT43]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT49:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT46]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT52:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT49]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT55:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT52]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT58:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT55]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT61:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT58]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT64:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT61]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT67:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT64]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT70:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT67]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT73:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT70]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT76:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT73]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT79:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT76]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT82:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT79]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT85:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT82]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT88:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT85]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT91:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT88]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call i64 @continuation.getAddrAndMD(i64 ptrtoint (ptr @MyIntersectionShaderLargeAttrs.resume.0 to i64)) +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP4]], i64 [[TMP5]], i32 5, float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_6_INSERT]], [15 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT91]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount !32 +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; DXILCONTPOSTPROCESS-CPS: accepthit.i: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 100 to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0334_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP6]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 101 to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0334_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0334_0_VEC_INSERT]], float [[TMP7]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT333:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0334_4_VEC_INSERT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT288:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT333]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; DXILCONTPOSTPROCESS-CPS-NEXT: br i1 [[ISEND_I]], label [[TMP8:%.*]], label [[TMP13:%.*]] +; DXILCONTPOSTPROCESS-CPS: 8: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_INSERT291:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT294:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT291]], <2 x float> [[DOTFCA_0_EXTRACT288]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT297:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT294]], float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT300:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT297]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT303:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT300]], <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT306:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT303]], <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT309:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT306]], float [[SYSTEM_DATA_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT312:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT309]], i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT124:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT127:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT124]], i32 102, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT130:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT127]], i32 103, 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT133:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT130]], i32 104, 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT136:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT133]], i32 105, 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT139:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT136]], i32 106, 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT142:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT139]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT145:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT142]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT148:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT145]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT151:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT148]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT154:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT151]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT157:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT154]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT160:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT157]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT163:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT160]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT166:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT163]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT169:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT166]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT172:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT169]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT175:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT172]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT178:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT175]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT181:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT178]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT184:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT181]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT187:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT184]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT190:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT187]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT193:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT190]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT196:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT193]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT199:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT196]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT202:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT199]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT205:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT202]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT208:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT205]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT211:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT208]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -8 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP10]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP11]], i32 [[TMP12]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT312]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; DXILCONTPOSTPROCESS-CPS: 13: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT288]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT272:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT273:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT272]], <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT274:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT273]], float [[SYSTEM_DATA_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT275:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT274]], i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 102, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 103, 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 104, 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 105, 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 106, 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -8 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP16]], i32 [[TMP17]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT275]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define dso_local void @MyIntersectionShaderLargeAttrs.resume.0( +; DXILCONTPOSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_TRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META38]] !lgc.cps [[META42]] !continuation [[META44]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: entryresume.0: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], -8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_EXTRACT280:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_EXTRACT282:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_EXTRACT284:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_EXTRACT286:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP7]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; DXILCONTPOSTPROCESS-CPS-NEXT: br i1 [[ISEND_I]], label [[TMP8:%.*]], label [[TMP15:%.*]] +; DXILCONTPOSTPROCESS-CPS: 8: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD6:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_INSERT291:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT294:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT291]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT297:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT294]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT300:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT297]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT303:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT300]], <3 x float> [[DOTFCA_2_EXTRACT280]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT306:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT303]], <3 x float> [[DOTFCA_3_EXTRACT282]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT309:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT306]], float [[DOTFCA_4_EXTRACT284]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT312:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT309]], i64 [[DOTFCA_5_EXTRACT286]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT124:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT127:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT124]], i32 [[DOTFCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT130:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT127]], i32 [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT133:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT130]], i32 [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT136:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT133]], i32 [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT139:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT136]], i32 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT142:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT139]], i32 [[DOTFCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT145:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT142]], i32 [[DOTFCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT148:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT145]], i32 [[DOTFCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT151:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT148]], i32 [[DOTFCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT154:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT151]], i32 [[DOTFCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT157:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT154]], i32 [[DOTFCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT160:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT157]], i32 [[DOTFCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT163:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT160]], i32 [[DOTFCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT166:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT163]], i32 [[DOTFCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT169:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT166]], i32 [[DOTFCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT172:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT169]], i32 [[DOTFCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT175:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT172]], i32 [[DOTFCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT178:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT175]], i32 [[DOTFCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT181:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT178]], i32 [[DOTFCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT184:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT181]], i32 [[DOTFCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT187:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT184]], i32 [[DOTFCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT190:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT187]], i32 [[DOTFCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT193:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT190]], i32 [[DOTFCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT196:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT193]], i32 [[DOTFCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT199:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT196]], i32 [[DOTFCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT202:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT199]], i32 [[DOTFCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT205:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT202]], i32 [[DOTFCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT208:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT205]], i32 [[DOTFCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT211:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT208]], i32 [[DOTFCA_29_EXTRACT]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -8 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = zext i32 [[RETURN_ADDR_RELOAD6]] to i64 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP13]], i32 [[TMP14]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT312]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; DXILCONTPOSTPROCESS-CPS: 15: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP16]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP17]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT272:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT280]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT273:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT272]], <3 x float> [[DOTFCA_3_EXTRACT282]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT274:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT273]], float [[DOTFCA_4_EXTRACT284]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT275:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT274]], i64 [[DOTFCA_5_EXTRACT286]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], -8 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP19]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP20]], i32 [[TMP21]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT275]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; +; +; DXILCONTPOSTPROCESS-CPS-LABEL: define void @MyMissShader( +; DXILCONTPOSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42]] !lgc.cps [[META38]] !continuation [[META45:![0-9]+]] { +; DXILCONTPOSTPROCESS-CPS-NEXT: AllocaSpillBB: +; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 0, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 1, 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT9:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP4]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP5]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP6]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP7]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP9]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = zext i32 [[RETURN_ADDR]] to i64 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP10]], i32 [[TMP11]], i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [21 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable +; diff --git a/llvmraytracing/test/dx/unnamed-type-intrinsics.ll b/llvmraytracing/test/dx/unnamed-type-intrinsics.ll index f59a8644e1..ecea4a3fb7 100644 --- a/llvmraytracing/test/dx/unnamed-type-intrinsics.ll +++ b/llvmraytracing/test/dx/unnamed-type-intrinsics.ll @@ -346,7 +346,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define void @MyRayGen( -; LOWERRAYTRACINGPIPELINE-SAME: [[TMP0:%.*]] [[TMP0]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META15:![0-9]+]] !continuation.entry [[META21:![0-9]+]] !continuation.registercount [[META15]] !continuation [[META22:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[TMP0:%.*]] [[TMP0]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META14:![0-9]+]] !continuation.entry [[META20:![0-9]+]] !continuation.registercount [[META14]] !continuation [[META21:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[TMP0]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[TMP0]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) @@ -356,9 +356,9 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = bitcast ptr [[TMP4]] to ptr ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP5]]) #[[ATTR1:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA23:![0-9]+]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP6]], align 4, !tbaa [[TBAA22:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[TMP2]] undef, [[TMP0]] [[DIS_DATA_I]], 0 @@ -366,60 +366,54 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR3:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[TMP1]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP10]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP14]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_0_CALLER_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = call ptr inttoptr (i64 4 to ptr)([[TMP1]] [[TRAV_DATA2_I]]), !continuation.registercount [[META19:![0-9]+]], !continuation.returnedRegistercount !19 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = call [[TMP0]] @await.(ptr [[TMP21]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = call ptr inttoptr (i64 4 to ptr)([[TMP1]] [[TRAV_DATA2_I]]), !continuation.registercount [[META18:![0-9]+]], !continuation.returnedRegistercount !18 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = call [[TMP0]] [[AWAIT_:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP21]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP23]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP27]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP27]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP27]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[TMP0]] [[TMP22]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA23]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA22]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() ; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP35]], i8 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() ; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP36]], i8 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.createHandleForLib.dx.types.Handle(i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = call [[DX_TYPES_HANDLE]] @dx.op.annotateHandle(i32 216, [[DX_TYPES_HANDLE]] [[TMP37]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE]](i32 216, [[DX_TYPES_HANDLE]] [[TMP37]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = extractelement <4 x float> [[TMP34]], i64 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = extractelement <4 x float> [[TMP34]], i64 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[TMP34]], i64 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = extractelement <4 x float> [[TMP34]], i64 3 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP38]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP39]], float [[TMP40]], float [[TMP41]], float [[TMP42]], i8 15) ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP5]]) #[[ATTR1]] -; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META18:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret void, !continuation.registercount [[META17:![0-9]+]] ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %0 @MyClosestHit( -; LOWERRAYTRACINGPIPELINE-SAME: [[TMP2:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META26:![0-9]+]] !continuation.registercount [[META19]] !continuation [[META27:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: [[TMP2:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META25:![0-9]+]] !continuation.registercount [[META18]] !continuation [[META26:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[TMP2]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 @@ -428,29 +422,23 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[TMP2]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[TMP4]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP6]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP10]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP10]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_0_I32S]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] @_cont_GetTriangleHitAttributes(ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP17]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr [[HITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP21]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[TMP5]]) @@ -466,23 +454,19 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float 1.000000e+00, i64 3 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP33]], ptr [[TMP34]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @registerbuffer.setpointerbarrier(ptr @PAYLOAD) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[TMP35]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr [[TMP36]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[TMP35]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[TMP39]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT:%.*]], ptr @PAYLOAD, i32 0, i32 0, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[TMP39]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP35]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP39]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr [[TMP39]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP45]], ptr getelementptr ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_5_CLOSESTHIT_OUT]], ptr @PAYLOAD, i32 0, i32 0, i64 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP45]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[TMP2]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load [[TMP0]], ptr [[TMP46]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret [[TMP0]] [[TMP47]], !continuation.registercount [[META19]] +; LOWERRAYTRACINGPIPELINE-NEXT: ret [[TMP0]] [[TMP47]], !continuation.registercount [[META18]] ; diff --git a/llvmraytracing/test/intrinsics/discard-values.ll b/llvmraytracing/test/intrinsics/discard-values.ll index 421489f008..83ec328c4a 100644 --- a/llvmraytracing/test/intrinsics/discard-values.ll +++ b/llvmraytracing/test/intrinsics/discard-values.ll @@ -16,7 +16,8 @@ declare !types !10 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) define float @discard_f32() { ; CHECK-LABEL: define float @discard_f32() { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret float poison +; CHECK-NEXT: [[TMP0:%.*]] = freeze float poison +; CHECK-NEXT: ret float [[TMP0]] ; entry: %result = call float @_AmdGetUninitializedF32() @@ -26,7 +27,8 @@ entry: define i32 @discard_i32() { ; CHECK-LABEL: define i32 @discard_i32() { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 poison +; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 poison +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: %result = call i32 @_AmdGetUninitializedI32() @@ -36,7 +38,8 @@ entry: define %struct.AnyHitData @discard_struct() { ; CHECK-LABEL: define %struct.AnyHitData @discard_struct() { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret [[STRUCT_ANYHITDATA:%.*]] poison +; CHECK-NEXT: [[TMP0:%.*]] = freeze [[STRUCT_ANYHITDATA:%.*]] poison +; CHECK-NEXT: ret [[STRUCT_ANYHITDATA]] [[TMP0]] ; entry: %result = call %struct.AnyHitData @_AmdGetUninitializedStruct() diff --git a/llvmraytracing/test/lgccps/alloca-select.ll b/llvmraytracing/test/lgccps/alloca-select.ll index 1980f014ec..6fc00840cf 100644 --- a/llvmraytracing/test/lgccps/alloca-select.ll +++ b/llvmraytracing/test/lgccps/alloca-select.ll @@ -26,23 +26,23 @@ declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test ; CHECK-SAME: ({} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]], i32 [[ARG1:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { ; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 20) -; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 -; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 -; CHECK-NEXT: [[ARG1_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 20) +; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 +; CHECK-NEXT: [[ARG1_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 4 ; CHECK-NEXT: store i32 [[ARG1]], ptr addrspace(32) [[ARG1_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 3 ; CHECK-NEXT: store float [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 ; CHECK-NEXT: store i32 [[RCR]], ptr addrspace(32) [[RCR_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ARG1]], 0 ; CHECK-NEXT: [[P:%.*]] = select i1 [[COND]], ptr addrspace(32) [[A1]], ptr addrspace(32) [[A2]] ; CHECK-NEXT: store i32 111, ptr addrspace(32) [[P]], align 4 ; CHECK-NEXT: [[T0:%.*]] = fadd float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[CR:%.*]] = call i32 @lgc.cps.as.continuation.reference(ptr @callee) -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i32 [[CR]] to ptr -; CHECK-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @test.resume.0) -; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP2]], float [[T0]]) +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i32 [[CR]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @test.resume.0) +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP1]], float [[T0]]) ; CHECK-NEXT: unreachable ; ; diff --git a/llvmraytracing/test/lgccps/await-if-else.ll b/llvmraytracing/test/lgccps/await-if-else.ll index 57658fa5b7..331999eb9e 100644 --- a/llvmraytracing/test/lgccps/await-if-else.ll +++ b/llvmraytracing/test/lgccps/await-if-else.ll @@ -33,10 +33,10 @@ declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { ; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) -; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CHECK-NEXT: store float [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[RCR]], ptr addrspace(32) [[RCR_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[T0:%.*]] = fadd float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[CR:%.*]] = call i32 @lgc.cps.as.continuation.reference(ptr @callee) @@ -44,14 +44,14 @@ declare void @lgc.cps.jump(...) ; CHECK-NEXT: [[COND:%.*]] = fcmp olt float [[T0]], 1.000000e+00 ; CHECK-NEXT: br i1 [[COND]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i32 [[CR]] to ptr -; CHECK-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @test.resume.0) -; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP2]], float [[ARG]]) +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i32 [[CR]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @test.resume.0) +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP1]], float [[ARG]]) ; CHECK-NEXT: unreachable ; CHECK: bb2: -; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i32 [[CR2]] to ptr -; CHECK-NEXT: [[TMP4:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @test.resume.1) -; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR2]], i32 2, {} poison, i32 [[TMP4]], float [[T0]]) +; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[CR2]] to ptr +; CHECK-NEXT: [[TMP3:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @test.resume.1) +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR2]], i32 2, {} poison, i32 [[TMP3]], float [[T0]]) ; CHECK-NEXT: unreachable ; ; diff --git a/llvmraytracing/test/lgccps/await-if.ll b/llvmraytracing/test/lgccps/await-if.ll index 9ea90a281d..e63a8b9ba8 100644 --- a/llvmraytracing/test/lgccps/await-if.ll +++ b/llvmraytracing/test/lgccps/await-if.ll @@ -29,19 +29,19 @@ declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) -; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CHECK-NEXT: store float [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[RCR]], ptr addrspace(32) [[RCR_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[T0:%.*]] = fadd float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[CR:%.*]] = call i32 @lgc.cps.as.continuation.reference(ptr @callee) ; CHECK-NEXT: [[COND:%.*]] = fcmp olt float [[T0]], 1.000000e+00 ; CHECK-NEXT: br i1 [[COND]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i32 [[CR]] to ptr -; CHECK-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @test.resume.0) -; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP2]], float [[ARG]]) +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i32 [[CR]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @test.resume.0) +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP1]], float [[ARG]]) ; CHECK-NEXT: unreachable ; CHECK: bb2: ; CHECK-NEXT: [[T0_BB2:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ] diff --git a/llvmraytracing/test/lgccps/await-in-loop.ll b/llvmraytracing/test/lgccps/await-in-loop.ll index 0de4d89173..e0e5e3d7ea 100644 --- a/llvmraytracing/test/lgccps/await-in-loop.ll +++ b/llvmraytracing/test/lgccps/await-in-loop.ll @@ -31,22 +31,22 @@ declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]], float [[ARG2:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 20) -; CHECK-NEXT: [[ARG2_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 20) +; CHECK-NEXT: [[ARG2_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 ; CHECK-NEXT: store float [[ARG2]], ptr addrspace(32) [[ARG2_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CHECK-NEXT: store float [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[RCR]], ptr addrspace(32) [[RCR_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[T0:%.*]] = fadd float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[CR:%.*]] = call i32 @lgc.cps.as.continuation.reference(ptr @callee) -; CHECK-NEXT: [[CR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[CR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 3 ; CHECK-NEXT: store i32 [[CR]], ptr addrspace(32) [[CR_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[IND_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[IND_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 4 ; CHECK-NEXT: store i32 0, ptr addrspace(32) [[IND_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i32 [[CR]] to ptr -; CHECK-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @test.resume.0) -; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP2]], i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i32 [[CR]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @test.resume.0) +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP1]], i32 0) ; CHECK-NEXT: unreachable ; ; @@ -66,7 +66,7 @@ declare void @lgc.cps.jump(...) ; CHECK-NEXT: [[CR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 3 ; CHECK-NEXT: [[CR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[CR_RELOAD_ADDR]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i32 [[CR_RELOAD]] to ptr -; CHECK-NEXT: [[TMP6:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @test.resume.0) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @test.resume.0) ; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR_RELOAD]], i32 2, {} poison, i32 [[TMP6]], i32 [[INC_LOOP]]) ; CHECK-NEXT: unreachable ; CHECK: end: diff --git a/llvmraytracing/test/lgccps/cleanup-store-loads.ll b/llvmraytracing/test/lgccps/cleanup-store-loads.ll index 37afa18379..cef6961ff9 100644 --- a/llvmraytracing/test/lgccps/cleanup-store-loads.ll +++ b/llvmraytracing/test/lgccps/cleanup-store-loads.ll @@ -261,15 +261,15 @@ attributes #4 = { alwaysinline } ; CHECK-LABEL: define void @test( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 408) -; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 408) +; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CHECK-NEXT: store float [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[RCR]], ptr addrspace(32) [[RCR_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[T0:%.*]] = fadd float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[CR:%.*]] = call i32 @lgc.cps.as.continuation.reference(ptr @callee) ; CHECK-NEXT: [[COND:%.*]] = fcmp olt float [[T0]], 1.000000e+00 -; CHECK-NEXT: [[DATA:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[DATA:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 ; CHECK-NEXT: [[VAL_I:%.*]] = call i32 @getVal32() ; CHECK-NEXT: store i32 [[VAL_I]], ptr addrspace(32) [[DATA]], align 2 ; CHECK-NEXT: [[VAL_I1:%.*]] = call i32 @getVal32() @@ -313,9 +313,9 @@ attributes #4 = { alwaysinline } ; CHECK-NEXT: store i32 [[VAL_I23]], ptr addrspace(32) [[ADDR_I24]], align 2 ; CHECK-NEXT: br i1 [[COND]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i32 [[CR]] to ptr -; CHECK-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @test.resume.0) -; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP2]], float [[ARG]]) +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i32 [[CR]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @test.resume.0) +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP1]], float [[ARG]]) ; CHECK-NEXT: unreachable ; CHECK: bb2: ; CHECK-NEXT: [[T0_BB2:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ] diff --git a/llvmraytracing/test/lgccps/entry-point-with-cps.ll b/llvmraytracing/test/lgccps/entry-point-with-cps.ll index e65f269e35..7b181539d8 100644 --- a/llvmraytracing/test/lgccps/entry-point-with-cps.ll +++ b/llvmraytracing/test/lgccps/entry-point-with-cps.ll @@ -75,7 +75,7 @@ declare void @lgc.cps.jump(...) ; CHECK-NEXT: [[CR_0:%.*]] = ptrtoint ptr [[FN]] to i32 ; CHECK-NEXT: [[CR_1:%.*]] = or i32 [[CR_0]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i32 [[CR_1]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @raygen.resume.0) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @raygen.resume.0) ; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR_1]], i32 4, {} poison, i32 [[TMP1]], i32 [[X]], ptr addrspace(1) [[DST]]) ; CHECK-NEXT: unreachable ; @@ -93,16 +93,16 @@ declare void @lgc.cps.jump(...) ; CHECK-LABEL: define spir_func void @chs( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], i32 [[X:%.*]]) !lgc.shaderstage [[META0]] !lgc.cps [[META3:![0-9]+]] !continuation [[META4:![0-9]+]] { ; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) -; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CHS_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CHS_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[RCR]], ptr addrspace(32) [[RCR_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[PUSHCONST:%.*]] = call ptr addrspace(4) @lgc.user.data(i32 24) ; CHECK-NEXT: [[FN:%.*]] = load ptr, ptr addrspace(4) [[PUSHCONST]], align 8 ; CHECK-NEXT: [[CR_0:%.*]] = ptrtoint ptr [[FN]] to i32 ; CHECK-NEXT: [[CR_1:%.*]] = or i32 [[CR_0]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i32 [[CR_1]] to ptr -; CHECK-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @chs.resume.0) -; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR_1]], i32 2, {} poison, i32 [[TMP2]], i32 [[X]]) +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i32 [[CR_1]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @chs.resume.0) +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR_1]], i32 2, {} poison, i32 [[TMP1]], i32 [[X]]) ; CHECK-NEXT: unreachable ; ; diff --git a/llvmraytracing/test/lgccps/lower-traversal.ll b/llvmraytracing/test/lgccps/lower-traversal.ll new file mode 100644 index 0000000000..689d6beb69 --- /dev/null +++ b/llvmraytracing/test/lgccps/lower-traversal.ll @@ -0,0 +1,648 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint' -S %s 2> %t0.stderr | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s +; RUN: count 0 < %t0.stderr + +%struct.AnyHitTraversalData = type { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } +%struct.DispatchSystemData = type { i32 } + +; Need _cont_ReportHit to get system data type +declare !types !6 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) + +declare !types !10 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) + +declare i64 @_AmdGetCurrentFuncAddr() + +define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @_cont_Traversal(ptr addrspace(5) %0) local_unnamed_addr !lgc.shaderstage !0 !types !1 !lgc.rt.shaderstage !3 !lgc.rt.attribute.size !4 { +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define dso_local spir_func void @_cont_Traversal( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [7 x i32] [[PADDING:%.*]], [8 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META4:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !lgc.rt.attribute.size [[META6:![0-9]+]] !lgc.cps [[META7:![0-9]+]] !continuation [[META8:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: .entry: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, align 16, addrspace(5) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[SYSTEM_DATA]], ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], align 16 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 93, i32 17, i32 0, i32 0) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = load <3 x i32>, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], align 16 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(5) [[TMP5]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(5) [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(5) [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFR539:%.*]] = freeze i32 [[TMP10]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load <3 x float>, ptr addrspace(5) [[TMP11]], align 16 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load <3 x float>, ptr addrspace(5) [[TMP13]], align 16 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 5 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load float, ptr addrspace(5) [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 6 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = load float, ptr addrspace(5) [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load float, ptr addrspace(5) [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(5) [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFR:%.*]] = freeze i32 [[TMP22]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2, i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(5) [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2, i32 0, i32 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(5) [[TMP25]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2, i32 0, i32 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(5) [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = load <2 x float>, ptr addrspace(5) [[TMP29]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2, i32 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(5) [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2, i32 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(5) [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2, i32 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(5) [[TMP35]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2, i32 5 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(5) [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2, i32 6 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(5) [[TMP39]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2, i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(5) [[TMP41]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP43:%.*]] = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2, i32 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP44:%.*]] = load i64, ptr addrspace(5) [[TMP43]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP45:%.*]] = icmp ugt i32 [[DOTFR]], -3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP45]], label [[TMP46:%.*]], label [[TMP67:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 46: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP47:%.*]] = getelementptr inbounds <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>, ptr addrspace(7) [[TMP0]], i32 0, i32 5 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(7) [[TMP47]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>, ptr addrspace(7) [[TMP0]], i32 0, i32 6 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(7) [[TMP49]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP51:%.*]] = zext i32 [[TMP50]] to i64 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP52:%.*]] = shl nuw i64 [[TMP51]], 32 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP53:%.*]] = zext i32 [[TMP48]] to i64 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP54:%.*]] = or i64 [[TMP52]], [[TMP53]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFR541:%.*]] = freeze i64 [[TMP54]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP55:%.*]] = icmp eq i64 [[DOTFR541]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[TMP55]], label [[DOTEXIT2:%.*]], label [[TMP56:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 56: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP57:%.*]] = lshr i32 [[TMP8]], 16 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP58:%.*]] = getelementptr inbounds <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>, ptr addrspace(7) [[TMP0]], i32 0, i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(7) [[TMP58]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP60:%.*]] = mul i32 [[TMP59]], [[TMP57]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP61:%.*]] = zext i32 [[TMP60]] to i64 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP62:%.*]] = add i64 [[DOTFR541]], [[TMP61]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP63:%.*]] = inttoptr i64 [[TMP62]] to ptr addrspace(4) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(4) [[TMP63]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP65:%.*]] = freeze i32 [[TMP64]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[DOTEXIT2]] +; LOWERRAYTRACINGPIPELINE-CPS: .exit2: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP57]], [[TMP56]] ], [ undef, [[TMP46]] ] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTSROA_0128_0_EXTRACT_TRUNC:%.*]] = phi i32 [ [[TMP65]], [[TMP56]] ], [ 0, [[TMP46]] ] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTNOT542:%.*]] = icmp eq i32 [[DOTSROA_0128_0_EXTRACT_TRUNC]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[DOTNOT542]], label [[TMP106:%.*]], label [[TMP66:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 66: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTSROA_0130_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP44]] to i32 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } poison, <3 x i32> [[TMP2]], 0, 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_0_0_INSERT]], i32 [[DOT0]], 0, 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_0_1_INSERT]], i64 [[TMP6]], 1, 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_0_INSERT]], i32 [[TMP8]], 1, 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_1_INSERT]], i32 [[DOTFR539]], 1, 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_2_INSERT]], <3 x float> [[TMP12]], 1, 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_4_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_3_INSERT]], <3 x float> [[TMP14]], 1, 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_5_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_4_INSERT]], float [[TMP16]], 1, 5 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_6_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_5_INSERT]], float [[TMP18]], 1, 6 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_0_0_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_6_INSERT]], float [[TMP20]], 2, 0, 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_0_1_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_0_0_INSERT]], i32 [[DOTFR]], 2, 0, 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_0_2_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_0_1_INSERT]], i32 [[TMP24]], 2, 0, 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_0_3_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_0_2_INSERT]], i32 [[TMP26]], 2, 0, 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_0_4_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_0_3_INSERT]], i32 [[TMP28]], 2, 0, 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_1_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_0_4_INSERT]], <2 x float> [[TMP30]], 2, 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_2_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_1_INSERT]], i32 [[TMP32]], 2, 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_3_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_2_INSERT]], i32 [[TMP34]], 2, 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_4_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_3_INSERT]], i32 [[TMP36]], 2, 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_5_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_4_INSERT]], i32 [[TMP38]], 2, 5 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_6_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_5_INSERT]], i32 [[TMP40]], 2, 6 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_7_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_6_INSERT]], i32 [[TMP42]], 2, 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_8_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_7_INSERT]], i64 [[TMP44]], 2, 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[DOTSROA_0128_0_EXTRACT_TRUNC]], i32 -1, {} poison, i32 [[DOTSROA_0130_0_EXTRACT_TRUNC]], i32 [[DOT0]], { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_8_INSERT]], [7 x i32] poison, [8 x i32] [[PAYLOAD]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; LOWERRAYTRACINGPIPELINE-CPS: 67: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP68:%.*]] = shl i32 [[DOTFR]], 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP69:%.*]] = and i32 [[TMP68]], -64 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP70:%.*]] = zext i32 [[TMP69]] to i64 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP71:%.*]] = add i64 [[TMP6]], [[TMP70]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP72:%.*]] = add i64 [[TMP71]], 48 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP73:%.*]] = inttoptr i64 [[TMP72]] to ptr addrspace(1) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP74:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP73]], align 16 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOT4_VEC_EXTRACT452:%.*]] = extractelement <4 x i32> [[TMP74]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP75:%.*]] = and i32 [[TMP26]], 16777215 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP76:%.*]] = and i32 [[DOT4_VEC_EXTRACT452]], 16777215 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP77:%.*]] = lshr i32 [[TMP8]], 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP78:%.*]] = and i32 [[TMP77]], 15 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP79:%.*]] = lshr i32 [[TMP8]], 12 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP80:%.*]] = and i32 [[TMP79]], 15 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP81:%.*]] = mul nuw nsw i32 [[TMP80]], [[TMP75]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP82:%.*]] = add nuw nsw i32 [[TMP78]], [[TMP81]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP83:%.*]] = add nuw nsw i32 [[TMP82]], [[TMP76]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP84:%.*]] = getelementptr inbounds <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>, ptr addrspace(7) [[TMP0]], i32 0, i32 9 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(7) [[TMP84]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP86:%.*]] = getelementptr inbounds <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>, ptr addrspace(7) [[TMP0]], i32 0, i32 10 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(7) [[TMP86]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP88:%.*]] = zext i32 [[TMP87]] to i64 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP89:%.*]] = shl nuw i64 [[TMP88]], 32 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP90:%.*]] = zext i32 [[TMP85]] to i64 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP91:%.*]] = or i64 [[TMP89]], [[TMP90]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFR537:%.*]] = freeze i64 [[TMP91]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[DOTFR537]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[DOTNOT]], label [[DOTEXIT5:%.*]], label [[TMP92:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 92: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP93:%.*]] = getelementptr inbounds <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>, ptr addrspace(7) [[TMP0]], i32 0, i32 11 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(7) [[TMP93]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP95:%.*]] = mul i32 [[TMP94]], [[TMP83]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP96:%.*]] = zext i32 [[TMP95]] to i64 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP97:%.*]] = add i64 [[DOTFR537]], [[TMP96]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP98:%.*]] = inttoptr i64 [[TMP97]] to ptr addrspace(1) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP99:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP98]], align 16 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP100:%.*]] = shufflevector <4 x i32> [[TMP99]], <4 x i32> poison, <2 x i32> +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP101:%.*]] = freeze <2 x i32> [[TMP100]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[DOTEXIT5]] +; LOWERRAYTRACINGPIPELINE-CPS: .exit5: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTSROA_0501_0:%.*]] = phi <2 x i32> [ [[TMP101]], [[TMP92]] ], [ zeroinitializer, [[TMP67]] ] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP102:%.*]] = and i32 [[DOTFR539]], 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTSROA_0150_0_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[DOTSROA_0501_0]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTNOT540:%.*]] = icmp eq i32 [[DOTSROA_0150_0_VEC_EXTRACT]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[OR_COND:%.*]] = or i1 [[TMP103]], [[DOTNOT540]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[OR_COND]], label [[TMP106]], label [[TMP104:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 104: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP105:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @_cont_Traversal) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTSROA_0320_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP105]] to i32 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_0_INSERT322:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } poison, <3 x i32> [[TMP2]], 0, 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_1_INSERT323:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_0_0_INSERT322]], i32 [[TMP83]], 0, 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_0_INSERT324:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_0_1_INSERT323]], i64 [[TMP6]], 1, 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_1_INSERT325:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_0_INSERT324]], i32 [[TMP8]], 1, 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_2_INSERT326:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_1_INSERT325]], i32 [[DOTFR539]], 1, 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_3_INSERT327:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_2_INSERT326]], <3 x float> [[TMP12]], 1, 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_4_INSERT328:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_3_INSERT327]], <3 x float> [[TMP14]], 1, 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_5_INSERT329:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_4_INSERT328]], float [[TMP16]], 1, 5 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_6_INSERT330:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_5_INSERT329]], float [[TMP18]], 1, 6 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_0_0_INSERT331:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_1_6_INSERT330]], float [[TMP20]], 2, 0, 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_0_1_INSERT332:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_0_0_INSERT331]], i32 [[DOTFR]], 2, 0, 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_0_2_INSERT333:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_0_1_INSERT332]], i32 [[TMP24]], 2, 0, 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_0_3_INSERT334:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_0_2_INSERT333]], i32 [[TMP26]], 2, 0, 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_0_4_INSERT335:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_0_3_INSERT334]], i32 [[TMP28]], 2, 0, 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_1_INSERT336:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_0_4_INSERT335]], <2 x float> [[TMP30]], 2, 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_2_INSERT337:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_1_INSERT336]], i32 [[TMP32]], 2, 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_3_INSERT338:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_2_INSERT337]], i32 [[TMP34]], 2, 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_4_INSERT339:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_3_INSERT338]], i32 [[TMP36]], 2, 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_5_INSERT340:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_4_INSERT339]], i32 [[TMP38]], 2, 5 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_6_INSERT341:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_5_INSERT340]], i32 [[TMP40]], 2, 6 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_7_INSERT342:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_6_INSERT341]], i32 [[TMP42]], 2, 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_2_8_INSERT343:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_7_INSERT342]], i64 [[TMP44]], 2, 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[DOTSROA_0150_0_VEC_EXTRACT]], i32 -1, {} poison, i32 [[DOTSROA_0320_0_EXTRACT_TRUNC]], i32 [[TMP83]], { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_8_INSERT343]], [7 x i32] poison, [8 x i32] [[PAYLOAD]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; LOWERRAYTRACINGPIPELINE-CPS: 106: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTSROA_7_0:%.*]] = phi i32 [ [[TMP4]], [[DOTEXIT2]] ], [ [[TMP83]], [[DOTEXIT5]] ] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTSROA_0373_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP44]] to i32 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <3 x i32>, i32 } poison, <3 x i32> [[TMP2]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <3 x i32>, i32 } [[DOTFCA_0_INSERT]], i32 [[DOTSROA_7_0]], 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[DOTSROA_0373_0_EXTRACT_TRUNC]], i32 -1, {} poison, i32 poison, i32 [[DOTSROA_7_0]], { <3 x i32>, i32 } [[DOTFCA_1_INSERT]], [34 x i32] poison, [8 x i32] [[PAYLOAD]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +.entry: + %1 = call ptr addrspace(7) @lgc.load.buffer.desc(i64 93, i32 17, i32 0, i32 0) + %2 = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) %1) + %3 = load <3 x i32>, ptr addrspace(5) %0, align 16 + %4 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 0, i32 1 + %5 = load i32, ptr addrspace(5) %4, align 4 + %6 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 1, i32 0 + %7 = load i64, ptr addrspace(5) %6, align 8 + %8 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 1, i32 1 + %9 = load i32, ptr addrspace(5) %8, align 4 + %10 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 1, i32 2 + %11 = load i32, ptr addrspace(5) %10, align 4 + %.fr539 = freeze i32 %11 + %12 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 1, i32 3 + %13 = load <3 x float>, ptr addrspace(5) %12, align 16 + %14 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 1, i32 4 + %15 = load <3 x float>, ptr addrspace(5) %14, align 16 + %16 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 1, i32 5 + %17 = load float, ptr addrspace(5) %16, align 4 + %18 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 1, i32 6 + %19 = load float, ptr addrspace(5) %18, align 4 + %20 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 2, i32 0, i32 0 + %21 = load float, ptr addrspace(5) %20, align 4 + %22 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 2, i32 0, i32 1 + %23 = load i32, ptr addrspace(5) %22, align 4 + %.fr = freeze i32 %23 + %24 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 2, i32 0, i32 2 + %25 = load i32, ptr addrspace(5) %24, align 4 + %26 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 2, i32 0, i32 3 + %27 = load i32, ptr addrspace(5) %26, align 4 + %28 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 2, i32 0, i32 4 + %29 = load i32, ptr addrspace(5) %28, align 4 + %30 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 2, i32 1 + %31 = load <2 x float>, ptr addrspace(5) %30, align 8 + %32 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 2, i32 2 + %33 = load i32, ptr addrspace(5) %32, align 4 + %34 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 2, i32 3 + %35 = load i32, ptr addrspace(5) %34, align 4 + %36 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 2, i32 4 + %37 = load i32, ptr addrspace(5) %36, align 4 + %38 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 2, i32 5 + %39 = load i32, ptr addrspace(5) %38, align 4 + %40 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 2, i32 6 + %41 = load i32, ptr addrspace(5) %40, align 4 + %42 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 2, i32 7 + %43 = load i32, ptr addrspace(5) %42, align 4 + %44 = getelementptr { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, ptr addrspace(5) %0, i32 0, i32 2, i32 8 + %45 = load i64, ptr addrspace(5) %44, align 8 + %46 = icmp ugt i32 %.fr, -3 + br i1 %46, label %47, label %68 + +47: ; preds = %.entry + %48 = getelementptr inbounds <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>, ptr addrspace(7) %1, i32 0, i32 5 + %49 = load i32, ptr addrspace(7) %48, align 4 + %50 = getelementptr inbounds <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>, ptr addrspace(7) %1, i32 0, i32 6 + %51 = load i32, ptr addrspace(7) %50, align 4 + %52 = zext i32 %51 to i64 + %53 = shl nuw i64 %52, 32 + %54 = zext i32 %49 to i64 + %55 = or i64 %53, %54 + %.fr541 = freeze i64 %55 + %56 = icmp eq i64 %.fr541, 0 + br i1 %56, label %.exit2, label %57 + +57: ; preds = %47 + %58 = lshr i32 %9, 16 + %59 = getelementptr inbounds <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>, ptr addrspace(7) %1, i32 0, i32 7 + %60 = load i32, ptr addrspace(7) %59, align 4 + %61 = mul i32 %60, %58 + %62 = zext i32 %61 to i64 + %63 = add i64 %.fr541, %62 + %64 = inttoptr i64 %63 to ptr addrspace(4) + %65 = load i32, ptr addrspace(4) %64, align 4 + %66 = freeze i32 %65 + br label %.exit2 + +.exit2: ; preds = %47, %57 + %.0 = phi i32 [ %58, %57 ], [ undef, %47 ] + %.sroa.0128.0.extract.trunc = phi i32 [ %66, %57 ], [ 0, %47 ] + %.not542 = icmp eq i32 %.sroa.0128.0.extract.trunc, 0 + br i1 %.not542, label %106, label %67 + +67: ; preds = %.exit2 + %.sroa.0130.0.extract.trunc = trunc i64 %45 to i32 + %.fca.0.0.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } poison, <3 x i32> %3, 0, 0 + %.fca.0.1.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.0.0.insert, i32 %.0, 0, 1 + %.fca.1.0.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.0.1.insert, i64 %7, 1, 0 + %.fca.1.1.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.0.insert, i32 %9, 1, 1 + %.fca.1.2.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.1.insert, i32 %.fr539, 1, 2 + %.fca.1.3.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.2.insert, <3 x float> %13, 1, 3 + %.fca.1.4.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.3.insert, <3 x float> %15, 1, 4 + %.fca.1.5.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.4.insert, float %17, 1, 5 + %.fca.1.6.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.5.insert, float %19, 1, 6 + %.fca.2.0.0.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.6.insert, float %21, 2, 0, 0 + %.fca.2.0.1.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.0.0.insert, i32 %.fr, 2, 0, 1 + %.fca.2.0.2.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.0.1.insert, i32 %25, 2, 0, 2 + %.fca.2.0.3.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.0.2.insert, i32 %27, 2, 0, 3 + %.fca.2.0.4.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.0.3.insert, i32 %29, 2, 0, 4 + %.fca.2.1.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.0.4.insert, <2 x float> %31, 2, 1 + %.fca.2.2.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.1.insert, i32 %33, 2, 2 + %.fca.2.3.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.2.insert, i32 %35, 2, 3 + %.fca.2.4.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.3.insert, i32 %37, 2, 4 + %.fca.2.5.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.4.insert, i32 %39, 2, 5 + %.fca.2.6.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.5.insert, i32 %41, 2, 6 + %.fca.2.7.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.6.insert, i32 %43, 2, 7 + %.fca.2.8.insert = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.7.insert, i64 %45, 2, 8 + call void (...) @lgc.cps.jump(i32 %.sroa.0128.0.extract.trunc, i32 -1, {} poison, i32 %.sroa.0130.0.extract.trunc, i32 %.0, { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.8.insert) + unreachable + +68: ; preds = %.entry + %69 = shl i32 %.fr, 3 + %70 = and i32 %69, -64 + %71 = zext i32 %70 to i64 + %72 = add i64 %7, %71 + %73 = add i64 %72, 48 + %74 = inttoptr i64 %73 to ptr addrspace(1) + %75 = load <4 x i32>, ptr addrspace(1) %74, align 16 + %.4.vec.extract452 = extractelement <4 x i32> %75, i64 1 + %76 = and i32 %27, 16777215 + %77 = and i32 %.4.vec.extract452, 16777215 + %78 = lshr i32 %9, 8 + %79 = and i32 %78, 15 + %80 = lshr i32 %9, 12 + %81 = and i32 %80, 15 + %82 = mul nuw nsw i32 %81, %76 + %83 = add nuw nsw i32 %79, %82 + %84 = add nuw nsw i32 %83, %77 + %85 = getelementptr inbounds <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>, ptr addrspace(7) %1, i32 0, i32 9 + %86 = load i32, ptr addrspace(7) %85, align 4 + %87 = getelementptr inbounds <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>, ptr addrspace(7) %1, i32 0, i32 10 + %88 = load i32, ptr addrspace(7) %87, align 4 + %89 = zext i32 %88 to i64 + %90 = shl nuw i64 %89, 32 + %91 = zext i32 %86 to i64 + %92 = or i64 %90, %91 + %.fr537 = freeze i64 %92 + %.not = icmp eq i64 %.fr537, 0 + br i1 %.not, label %.exit5, label %93 + +93: ; preds = %68 + %94 = getelementptr inbounds <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>, ptr addrspace(7) %1, i32 0, i32 11 + %95 = load i32, ptr addrspace(7) %94, align 4 + %96 = mul i32 %95, %84 + %97 = zext i32 %96 to i64 + %98 = add i64 %.fr537, %97 + %99 = inttoptr i64 %98 to ptr addrspace(1) + %100 = load <4 x i32>, ptr addrspace(1) %99, align 16 + %101 = shufflevector <4 x i32> %100, <4 x i32> poison, <2 x i32> + %102 = freeze <2 x i32> %101 + br label %.exit5 + +.exit5: ; preds = %93, %68 + %.sroa.0501.0 = phi <2 x i32> [ %102, %93 ], [ zeroinitializer, %68 ] + %103 = and i32 %.fr539, 8 + %104 = icmp ne i32 %103, 0 + %.sroa.0150.0.vec.extract = extractelement <2 x i32> %.sroa.0501.0, i64 0 + %.not540 = icmp eq i32 %.sroa.0150.0.vec.extract, 0 + %or.cond = or i1 %104, %.not540 + br i1 %or.cond, label %106, label %105 + +105: ; preds = %.exit5 + %addr = call i64 @_AmdGetCurrentFuncAddr() + %.sroa.0320.0.extract.trunc = trunc i64 %addr to i32 + %.fca.0.0.insert322 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } poison, <3 x i32> %3, 0, 0 + %.fca.0.1.insert323 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.0.0.insert322, i32 %84, 0, 1 + %.fca.1.0.insert324 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.0.1.insert323, i64 %7, 1, 0 + %.fca.1.1.insert325 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.0.insert324, i32 %9, 1, 1 + %.fca.1.2.insert326 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.1.insert325, i32 %.fr539, 1, 2 + %.fca.1.3.insert327 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.2.insert326, <3 x float> %13, 1, 3 + %.fca.1.4.insert328 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.3.insert327, <3 x float> %15, 1, 4 + %.fca.1.5.insert329 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.4.insert328, float %17, 1, 5 + %.fca.1.6.insert330 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.5.insert329, float %19, 1, 6 + %.fca.2.0.0.insert331 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.1.6.insert330, float %21, 2, 0, 0 + %.fca.2.0.1.insert332 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.0.0.insert331, i32 %.fr, 2, 0, 1 + %.fca.2.0.2.insert333 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.0.1.insert332, i32 %25, 2, 0, 2 + %.fca.2.0.3.insert334 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.0.2.insert333, i32 %27, 2, 0, 3 + %.fca.2.0.4.insert335 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.0.3.insert334, i32 %29, 2, 0, 4 + %.fca.2.1.insert336 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.0.4.insert335, <2 x float> %31, 2, 1 + %.fca.2.2.insert337 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.1.insert336, i32 %33, 2, 2 + %.fca.2.3.insert338 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.2.insert337, i32 %35, 2, 3 + %.fca.2.4.insert339 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.3.insert338, i32 %37, 2, 4 + %.fca.2.5.insert340 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.4.insert339, i32 %39, 2, 5 + %.fca.2.6.insert341 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.5.insert340, i32 %41, 2, 6 + %.fca.2.7.insert342 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.6.insert341, i32 %43, 2, 7 + %.fca.2.8.insert343 = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.7.insert342, i64 %45, 2, 8 + call void (...) @lgc.cps.jump(i32 %.sroa.0150.0.vec.extract, i32 -1, {} poison, i32 %.sroa.0320.0.extract.trunc, i32 %84, { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } %.fca.2.8.insert343) + unreachable + +106: ; preds = %.exit5, %.exit2 + %.sroa.7.0 = phi i32 [ %5, %.exit2 ], [ %84, %.exit5 ] + %.sroa.0373.0.extract.trunc = trunc i64 %45 to i32 + %.fca.0.insert = insertvalue { <3 x i32>, i32 } poison, <3 x i32> %3, 0 + %.fca.1.insert = insertvalue { <3 x i32>, i32 } %.fca.0.insert, i32 %.sroa.7.0, 1 + call void (...) @lgc.cps.jump(i32 %.sroa.0373.0.extract.trunc, i32 -1, {} poison, i32 poison, i32 %.sroa.7.0, { <3 x i32>, i32 } %.fca.1.insert) + unreachable +} + +; Set !lgc.rt.attribute.size to 0 to test padding is added correctly for _AmdEnqueueAnyHit (should be poison {}) +define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @_cont_Traversal_2(ptr addrspace(5) %0) local_unnamed_addr !lgc.shaderstage !0 !types !1 !lgc.rt.shaderstage !3 !lgc.rt.attribute.size !8 { +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define dso_local spir_func void @_cont_Traversal_2( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [5 x i32] [[PADDING:%.*]], [8 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META4]] !lgc.rt.shaderstage [[META5]] !lgc.rt.attribute.size [[META9:![0-9]+]] !lgc.cps [[META7]] !continuation [[META10:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: .entry: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, align 16, addrspace(5) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[SYSTEM_DATA]], ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], align 16 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 93, i32 17, i32 0, i32 0) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 16 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(5) [[TMP3]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 24 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 32 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load <3 x float>, ptr addrspace(5) [[TMP7]], align 16 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 48 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = load <3 x float>, ptr addrspace(5) [[TMP9]], align 16 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 64 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load float, ptr addrspace(5) [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 68 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load float, ptr addrspace(5) [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 80 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load float, ptr addrspace(5) [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 84 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(5) [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 88 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(5) [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 92 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(5) [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 96 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(5) [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 104 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load <2 x float>, ptr addrspace(5) [[TMP25]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 112 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(5) [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 116 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(5) [[TMP29]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 120 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(5) [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 124 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(5) [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 128 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(5) [[TMP35]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 132 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(5) [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 136 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(5) [[TMP39]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], i32 144 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = load i64, ptr addrspace(5) [[TMP41]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP43:%.*]] = and i64 [[TMP4]], 281474976710655 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP44:%.*]] = add nuw nsw i64 [[TMP43]], 48 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP45:%.*]] = inttoptr i64 [[TMP44]] to ptr addrspace(1) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP46:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP45]], align 16 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOT4_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP46]], i64 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP47:%.*]] = and i32 [[DOT4_VEC_EXTRACT]], 16777215 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP48:%.*]] = lshr i32 [[TMP6]], 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP50:%.*]] = add nuw nsw i32 [[TMP49]], [[TMP47]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 36 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(7) [[TMP51]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 40 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(7) [[TMP53]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP55:%.*]] = zext i32 [[TMP54]] to i64 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP56:%.*]] = shl nuw i64 [[TMP55]], 32 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP57:%.*]] = zext i32 [[TMP52]] to i64 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP58:%.*]] = or i64 [[TMP56]], [[TMP57]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFR:%.*]] = freeze i64 [[TMP58]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[DOTFR]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[DOTNOT]], label [[DOTEXIT4:%.*]], label [[TMP59:%.*]] +; LOWERRAYTRACINGPIPELINE-CPS: 59: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP60:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 44 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(7) [[TMP60]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], [[TMP50]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP63:%.*]] = zext i32 [[TMP62]] to i64 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP64:%.*]] = add i64 [[DOTFR]], [[TMP63]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP65:%.*]] = inttoptr i64 [[TMP64]] to ptr addrspace(1) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP66:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP65]], align 16 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP67:%.*]] = shufflevector <4 x i32> [[TMP66]], <4 x i32> poison, <2 x i32> +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[DOTEXIT4]] +; LOWERRAYTRACINGPIPELINE-CPS: .exit4: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTSROA_5334_0:%.*]] = phi <2 x i32> [ [[TMP67]], [[TMP59]] ], [ zeroinitializer, [[DOTENTRY:%.*]] ] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTSROA_370_8_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[DOTSROA_5334_0]], i64 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP68:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @_cont_Traversal_2) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTSROA_0112_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP68]] to i32 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } poison, i32 [[TMP2]], 0, 0, 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_0_1_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_0_0_INSERT]], i32 [[TMP50]], 0, 0, 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_0_1_INSERT]], i64 [[TMP4]], 0, 1, 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_1_0_INSERT]], i32 [[TMP6]], 0, 1, 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_1_1_INSERT]], <3 x float> [[TMP8]], 0, 1, 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_1_2_INSERT]], <3 x float> [[TMP10]], 0, 1, 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_1_4_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_1_3_INSERT]], float [[TMP12]], 0, 1, 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_1_5_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_1_4_INSERT]], float [[TMP14]], 0, 1, 5 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_0_0_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_1_5_INSERT]], float [[TMP16]], 0, 2, 0, 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_0_1_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_0_0_INSERT]], i32 [[TMP18]], 0, 2, 0, 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_0_2_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_0_1_INSERT]], i32 [[TMP20]], 0, 2, 0, 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_0_3_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_0_2_INSERT]], i32 [[TMP22]], 0, 2, 0, 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_0_4_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_0_3_INSERT]], i32 [[TMP24]], 0, 2, 0, 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_1_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_0_4_INSERT]], <2 x float> [[TMP26]], 0, 2, 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_2_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_1_INSERT]], i32 [[TMP28]], 0, 2, 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_3_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_2_INSERT]], i32 [[TMP30]], 0, 2, 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_4_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_3_INSERT]], i32 [[TMP32]], 0, 2, 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_5_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_4_INSERT]], i32 [[TMP34]], 0, 2, 5 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_6_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_5_INSERT]], i32 [[TMP36]], 0, 2, 6 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_7_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_6_INSERT]], i32 [[TMP38]], 0, 2, 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_8_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_7_INSERT]], i32 [[TMP40]], 0, 2, 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_0_2_9_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_8_INSERT]], i64 [[TMP42]], 0, 2, 9 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_0_2_9_INSERT]], float 0.000000e+00, 1, 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_1_0_INSERT]], i32 0, 1, 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_1_1_INSERT]], i32 0, 1, 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_1_2_INSERT]], i32 0, 1, 3 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DOTFCA_1_4_INSERT:%.*]] = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_1_3_INSERT]], i32 0, 1, 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[DOTSROA_370_8_VEC_EXTRACT]], i32 -1, {} poison, i32 [[DOTSROA_0112_0_EXTRACT_TRUNC]], i32 [[TMP50]], { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } [[DOTFCA_1_4_INSERT]], <2 x float> zeroinitializer, {} poison, [8 x i32] [[PAYLOAD]]) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +.entry: + %1 = call ptr addrspace(7) @lgc.load.buffer.desc(i64 93, i32 17, i32 0, i32 0) + %2 = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) %1) + %3 = load i32, ptr addrspace(5) %0, align 4 + %4 = getelementptr i8, ptr addrspace(5) %0, i32 16 + %5 = load i64, ptr addrspace(5) %4, align 8 + %6 = getelementptr i8, ptr addrspace(5) %0, i32 24 + %7 = load i32, ptr addrspace(5) %6, align 4 + %8 = getelementptr i8, ptr addrspace(5) %0, i32 32 + %9 = load <3 x float>, ptr addrspace(5) %8, align 16 + %10 = getelementptr i8, ptr addrspace(5) %0, i32 48 + %11 = load <3 x float>, ptr addrspace(5) %10, align 16 + %12 = getelementptr i8, ptr addrspace(5) %0, i32 64 + %13 = load float, ptr addrspace(5) %12, align 4 + %14 = getelementptr i8, ptr addrspace(5) %0, i32 68 + %15 = load float, ptr addrspace(5) %14, align 4 + %16 = getelementptr i8, ptr addrspace(5) %0, i32 80 + %17 = load float, ptr addrspace(5) %16, align 4 + %18 = getelementptr i8, ptr addrspace(5) %0, i32 84 + %19 = load i32, ptr addrspace(5) %18, align 4 + %20 = getelementptr i8, ptr addrspace(5) %0, i32 88 + %21 = load i32, ptr addrspace(5) %20, align 4 + %22 = getelementptr i8, ptr addrspace(5) %0, i32 92 + %23 = load i32, ptr addrspace(5) %22, align 4 + %24 = getelementptr i8, ptr addrspace(5) %0, i32 96 + %25 = load i32, ptr addrspace(5) %24, align 4 + %26 = getelementptr i8, ptr addrspace(5) %0, i32 104 + %27 = load <2 x float>, ptr addrspace(5) %26, align 8 + %28 = getelementptr i8, ptr addrspace(5) %0, i32 112 + %29 = load i32, ptr addrspace(5) %28, align 4 + %30 = getelementptr i8, ptr addrspace(5) %0, i32 116 + %31 = load i32, ptr addrspace(5) %30, align 4 + %32 = getelementptr i8, ptr addrspace(5) %0, i32 120 + %33 = load i32, ptr addrspace(5) %32, align 4 + %34 = getelementptr i8, ptr addrspace(5) %0, i32 124 + %35 = load i32, ptr addrspace(5) %34, align 4 + %36 = getelementptr i8, ptr addrspace(5) %0, i32 128 + %37 = load i32, ptr addrspace(5) %36, align 4 + %38 = getelementptr i8, ptr addrspace(5) %0, i32 132 + %39 = load i32, ptr addrspace(5) %38, align 4 + %40 = getelementptr i8, ptr addrspace(5) %0, i32 136 + %41 = load i32, ptr addrspace(5) %40, align 4 + %42 = getelementptr i8, ptr addrspace(5) %0, i32 144 + %43 = load i64, ptr addrspace(5) %42, align 8 + %44 = and i64 %5, 281474976710655 + %45 = add nuw nsw i64 %44, 48 + %46 = inttoptr i64 %45 to ptr addrspace(1) + %47 = load <4 x i32>, ptr addrspace(1) %46, align 16 + %.4.vec.extract = extractelement <4 x i32> %47, i64 1 + %48 = and i32 %.4.vec.extract, 16777215 + %49 = lshr i32 %7, 8 + %50 = and i32 %49, 15 + %51 = add nuw nsw i32 %50, %48 + %52 = getelementptr inbounds i8, ptr addrspace(7) %1, i32 36 + %53 = load i32, ptr addrspace(7) %52, align 4 + %54 = getelementptr inbounds i8, ptr addrspace(7) %1, i32 40 + %55 = load i32, ptr addrspace(7) %54, align 4 + %56 = zext i32 %55 to i64 + %57 = shl nuw i64 %56, 32 + %58 = zext i32 %53 to i64 + %59 = or i64 %57, %58 + %.fr = freeze i64 %59 + %.not = icmp eq i64 %.fr, 0 + br i1 %.not, label %.exit4, label %60 + +60: ; preds = %.entry + %61 = getelementptr inbounds i8, ptr addrspace(7) %1, i32 44 + %62 = load i32, ptr addrspace(7) %61, align 4 + %63 = mul i32 %62, %51 + %64 = zext i32 %63 to i64 + %65 = add i64 %.fr, %64 + %66 = inttoptr i64 %65 to ptr addrspace(1) + %67 = load <4 x i32>, ptr addrspace(1) %66, align 16 + %68 = shufflevector <4 x i32> %67, <4 x i32> poison, <2 x i32> + br label %.exit4 + +.exit4: ; preds = %.entry, %60 + %.sroa.5334.0 = phi <2 x i32> [ %68, %60 ], [ zeroinitializer, %.entry ] + %.sroa.370.8.vec.extract = extractelement <2 x i32> %.sroa.5334.0, i64 0 + %69 = call spir_func i64 @_AmdGetCurrentFuncAddr() + %.sroa.0112.0.extract.trunc = trunc i64 %69 to i32 + %.fca.0.0.0.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } poison, i32 %3, 0, 0, 0 + %.fca.0.0.1.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.0.0.insert, i32 %51, 0, 0, 1 + %.fca.0.1.0.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.0.1.insert, i64 %5, 0, 1, 0 + %.fca.0.1.1.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.1.0.insert, i32 %7, 0, 1, 1 + %.fca.0.1.2.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.1.1.insert, <3 x float> %9, 0, 1, 2 + %.fca.0.1.3.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.1.2.insert, <3 x float> %11, 0, 1, 3 + %.fca.0.1.4.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.1.3.insert, float %13, 0, 1, 4 + %.fca.0.1.5.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.1.4.insert, float %15, 0, 1, 5 + %.fca.0.2.0.0.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.1.5.insert, float %17, 0, 2, 0, 0 + %.fca.0.2.0.1.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.0.0.insert, i32 %19, 0, 2, 0, 1 + %.fca.0.2.0.2.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.0.1.insert, i32 %21, 0, 2, 0, 2 + %.fca.0.2.0.3.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.0.2.insert, i32 %23, 0, 2, 0, 3 + %.fca.0.2.0.4.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.0.3.insert, i32 %25, 0, 2, 0, 4 + %.fca.0.2.1.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.0.4.insert, <2 x float> %27, 0, 2, 1 + %.fca.0.2.2.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.1.insert, i32 %29, 0, 2, 2 + %.fca.0.2.3.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.2.insert, i32 %31, 0, 2, 3 + %.fca.0.2.4.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.3.insert, i32 %33, 0, 2, 4 + %.fca.0.2.5.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.4.insert, i32 %35, 0, 2, 5 + %.fca.0.2.6.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.5.insert, i32 %37, 0, 2, 6 + %.fca.0.2.7.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.6.insert, i32 %39, 0, 2, 7 + %.fca.0.2.8.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.7.insert, i32 %41, 0, 2, 8 + %.fca.0.2.9.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.8.insert, i64 %43, 0, 2, 9 + %.fca.1.0.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.0.2.9.insert, float 0.000000e+00, 1, 0 + %.fca.1.1.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.1.0.insert, i32 0, 1, 1 + %.fca.1.2.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.1.1.insert, i32 0, 1, 2 + %.fca.1.3.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.1.2.insert, i32 0, 1, 3 + %.fca.1.4.insert = insertvalue { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.1.3.insert, i32 0, 1, 4 + call void (...) @lgc.cps.jump(i32 %.sroa.370.8.vec.extract, i32 -1, {} poison, i32 %.sroa.0112.0.extract.trunc, i32 %51, { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %.fca.1.4.insert, <2 x float> zeroinitializer) + unreachable +} + +declare void @lgc.cps.jump(...) local_unnamed_addr +declare ptr addrspace(7) @lgc.load.buffer.desc(i64 %0, i32 %1, i32 %2, i32 %3) local_unnamed_addr +declare ptr @llvm.invariant.start.p7(i64 immarg %0, ptr addrspace(7) nocapture %1) + +!continuation.preservedPayloadRegisterCount = !{!7} +!lgc.cps.module = !{} + +!0 = !{i32 7} +!1 = !{!"function", { { float, i32, i32, i32, i32 }, <2 x float>, i32 } poison, !2} +!2 = !{i32 5, { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } poison} +!3 = !{i32 6} +!4 = !{i32 16} +!5 = !{i32 0, %struct.AnyHitTraversalData poison} +!6 = !{!"function", i1 poison, !5, float poison, i32 poison} +!7 = !{i32 8} +!8 = !{i32 0} +!9 = !{i32 0, %struct.DispatchSystemData poison} +!10 = !{!"function", i32 poison, !9} diff --git a/llvmraytracing/test/lgccps/multiple-await.ll b/llvmraytracing/test/lgccps/multiple-await.ll index ed1d23c7c0..0539843a52 100644 --- a/llvmraytracing/test/lgccps/multiple-await.ll +++ b/llvmraytracing/test/lgccps/multiple-await.ll @@ -24,18 +24,18 @@ declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]], float [[ARG2:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { ; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 12) -; CHECK-NEXT: [[ARG2_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 12) +; CHECK-NEXT: [[ARG2_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 ; CHECK-NEXT: store float [[ARG2]], ptr addrspace(32) [[ARG2_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CHECK-NEXT: store float [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[RCR]], ptr addrspace(32) [[RCR_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[T0:%.*]] = fadd float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[CR:%.*]] = call i32 @lgc.cps.as.continuation.reference(ptr @callee) -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i32 [[CR]] to ptr -; CHECK-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @test.resume.0) -; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP2]], float [[T0]]) +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i32 [[CR]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @test.resume.0) +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP1]], float [[T0]]) ; CHECK-NEXT: unreachable ; ; @@ -48,7 +48,7 @@ declare void @lgc.cps.jump(...) ; CHECK-NEXT: [[T2:%.*]] = fmul float [[TMP3]], [[ARG_RELOAD]] ; CHECK-NEXT: [[CR2:%.*]] = call i32 @lgc.cps.as.continuation.reference(ptr @callee2) ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i32 [[CR2]] to ptr -; CHECK-NEXT: [[TMP6:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @test.resume.1) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @test.resume.1) ; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR2]], i32 2, {} poison, i32 [[TMP6]], float [[T2]]) ; CHECK-NEXT: unreachable ; diff --git a/llvmraytracing/test/lgccps/simple-await-more-state.ll b/llvmraytracing/test/lgccps/simple-await-more-state.ll index 16d85e2a25..61f3280346 100644 --- a/llvmraytracing/test/lgccps/simple-await-more-state.ll +++ b/llvmraytracing/test/lgccps/simple-await-more-state.ll @@ -21,18 +21,18 @@ declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]], float [[ARG2:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { ; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 12) -; CHECK-NEXT: [[ARG2_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 12) +; CHECK-NEXT: [[ARG2_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 ; CHECK-NEXT: store float [[ARG2]], ptr addrspace(32) [[ARG2_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CHECK-NEXT: store float [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[RCR]], ptr addrspace(32) [[RCR_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[T0:%.*]] = fadd float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[CR:%.*]] = call i32 @lgc.cps.as.continuation.reference(ptr @callee) -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i32 [[CR]] to ptr -; CHECK-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @test.resume.0) -; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP2]], float [[T0]]) +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i32 [[CR]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @test.resume.0) +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP1]], float [[T0]]) ; CHECK-NEXT: unreachable ; ; diff --git a/llvmraytracing/test/lgccps/simple-await.ll b/llvmraytracing/test/lgccps/simple-await.ll index 2356a3b3ae..d4e3f5da9c 100644 --- a/llvmraytracing/test/lgccps/simple-await.ll +++ b/llvmraytracing/test/lgccps/simple-await.ll @@ -20,16 +20,16 @@ declare void @lgc.cps.jump(...) ; CHECK-LABEL: define void @test( ; CHECK-SAME: {} [[STATE:%.*]], i32 [[RCR:%.*]], float [[ARG:%.*]]) !lgc.cps [[META0:![0-9]+]] !continuation [[META1:![0-9]+]] { ; CHECK-NEXT: AllocaSpillBB: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) -; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CHECK-NEXT: store float [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RCR_SPILL_ADDR:%.*]] = getelementptr inbounds [[TEST_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[RCR]], ptr addrspace(32) [[RCR_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[T0:%.*]] = fadd float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[CR:%.*]] = call i32 @lgc.cps.as.continuation.reference(ptr @callee) -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i32 [[CR]] to ptr -; CHECK-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @test.resume.0) -; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP2]], float [[T0]]) +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i32 [[CR]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @test.resume.0) +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, {} poison, i32 [[TMP1]], float [[T0]]) ; CHECK-NEXT: unreachable ; ; diff --git a/tool/dumper/vkgcPipelineDumper.cpp b/tool/dumper/vkgcPipelineDumper.cpp index 105be4f11f..872d96444e 100644 --- a/tool/dumper/vkgcPipelineDumper.cpp +++ b/tool/dumper/vkgcPipelineDumper.cpp @@ -129,7 +129,7 @@ void *VKAPI_CALL IPipelineDumper::BeginPipelineDump(const PipelineDumpOptions *d assert(pipelineInfo.pGraphicsInfo); UnlinkedShaderStage unlinkedStage = UnlinkedStageCount; if (pipelineInfo.pGraphicsInfo->unlinked) { - if (pipelineInfo.pGraphicsInfo->fs.pModuleData) + if (PipelineDumper::isValidShaderInfo(pipelineInfo.pGraphicsInfo->fs)) unlinkedStage = UnlinkedStageFragment; else unlinkedStage = UnlinkedStageVertexProcess; @@ -170,6 +170,28 @@ void VKAPI_CALL IPipelineDumper::DumpPipelineBinary(void *dumpFile, GfxIpVersion PipelineDumper::DumpPipelineBinary(reinterpret_cast(dumpFile), gfxIp, pipelineBin); } +// ===================================================================================================================== +/// Dump graphics stage library file name. +/// +/// @param [in] pDumpFile The handle of pipeline dump file +/// @param [in] libFileNames File name array of size three +void VKAPI_CALL IPipelineDumper::DumpGraphicsLibraryFileName(void *dumpFile, const char **libFileNames) { + if (!dumpFile) + return; + + PipelineDumpFile *pipelineDumper = reinterpret_cast(dumpFile); + pipelineDumper->dumpFile << "\n[GraphicsLibrary]\n"; + static const char *libTypeName[] = {"preRaster", "fragment", "colorExport"}; + static const char *pipelineExt = ".pipe"; + for (unsigned i = 0; i < 3; i++) { + std::string tmpStr(libFileNames[i]); + if (!tmpStr.empty()) { + pipelineDumper->dumpFile << libTypeName[i] << "=" << tmpStr << pipelineExt << "\n"; + } + } + pipelineDumper->dumpFile << "\n"; +} + // ===================================================================================================================== // Dump extra info to pipeline file. // @@ -353,6 +375,14 @@ std::string PipelineDumper::getSpirvBinaryFileName(const MetroHash::Hash *hash) return std::string(fileName); } +// ===================================================================================================================== +// Checks whether the pipeline shader info contains valid shader info. +// +// @param info : Pipeline shader info +bool PipelineDumper::isValidShaderInfo(const PipelineShaderInfo &info) { + return info.pModuleData != nullptr || info.options.clientHash.lower != 0 || info.options.clientHash.upper != 0; +} + // ===================================================================================================================== // Gets the file name of pipeline info file according to the specified pipeline build info and pipeline hash. // @@ -369,31 +399,31 @@ std::string PipelineDumper::getPipelineInfoFileName(PipelineBuildInfo pipelineIn assert(pipelineInfo.pGraphicsInfo); const char *fileNamePrefix = nullptr; if (pipelineInfo.pGraphicsInfo->unlinked) { - if (pipelineInfo.pGraphicsInfo->task.pModuleData) + if (isValidShaderInfo(pipelineInfo.pGraphicsInfo->task)) fileNamePrefix = "PipelineLibTask"; - else if (pipelineInfo.pGraphicsInfo->vs.pModuleData) + else if (isValidShaderInfo(pipelineInfo.pGraphicsInfo->vs)) fileNamePrefix = "PipelineLibVs"; - else if (pipelineInfo.pGraphicsInfo->tcs.pModuleData) + else if (isValidShaderInfo(pipelineInfo.pGraphicsInfo->tcs)) fileNamePrefix = "PipelineLibTcs"; - else if (pipelineInfo.pGraphicsInfo->tes.pModuleData) + else if (isValidShaderInfo(pipelineInfo.pGraphicsInfo->tes)) fileNamePrefix = "PipelineLibTes"; - else if (pipelineInfo.pGraphicsInfo->gs.pModuleData) + else if (isValidShaderInfo(pipelineInfo.pGraphicsInfo->gs)) fileNamePrefix = "PipelineLibGs"; - else if (pipelineInfo.pGraphicsInfo->mesh.pModuleData) + else if (isValidShaderInfo(pipelineInfo.pGraphicsInfo->mesh)) fileNamePrefix = "PipelineLibMesh"; - else if (pipelineInfo.pGraphicsInfo->fs.pModuleData) + else if (isValidShaderInfo(pipelineInfo.pGraphicsInfo->fs)) fileNamePrefix = "PipelineLibFs"; else fileNamePrefix = "PipelineLibCes"; - } else if (pipelineInfo.pGraphicsInfo->tes.pModuleData && pipelineInfo.pGraphicsInfo->gs.pModuleData) + } else if (isValidShaderInfo(pipelineInfo.pGraphicsInfo->tes) && isValidShaderInfo(pipelineInfo.pGraphicsInfo->gs)) fileNamePrefix = "PipelineGsTess"; - else if (pipelineInfo.pGraphicsInfo->gs.pModuleData) + else if (isValidShaderInfo(pipelineInfo.pGraphicsInfo->gs)) fileNamePrefix = "PipelineGs"; - else if (pipelineInfo.pGraphicsInfo->tes.pModuleData) + else if (isValidShaderInfo(pipelineInfo.pGraphicsInfo->tes)) fileNamePrefix = "PipelineTess"; - else if (pipelineInfo.pGraphicsInfo->task.pModuleData && pipelineInfo.pGraphicsInfo->mesh.pModuleData) + else if (isValidShaderInfo(pipelineInfo.pGraphicsInfo->task) && isValidShaderInfo(pipelineInfo.pGraphicsInfo->mesh)) fileNamePrefix = "PipelineTaskMesh"; - else if (pipelineInfo.pGraphicsInfo->mesh.pModuleData) + else if (isValidShaderInfo(pipelineInfo.pGraphicsInfo->mesh)) fileNamePrefix = "PipelineMesh"; else fileNamePrefix = "PipelineVsFs"; @@ -591,14 +621,16 @@ void PipelineDumper::dumpResourceMappingNode(const ResourceMappingNode *userData // @param shaderInfo : Shader info of specified shader stage // @param [out] dumpFile : Dump file void PipelineDumper::dumpPipelineShaderInfo(const PipelineShaderInfo *shaderInfo, std::ostream &dumpFile) { - const ShaderModuleData *moduleData = reinterpret_cast(shaderInfo->pModuleData); - auto moduleHash = reinterpret_cast(&moduleData->hash[0]); - // Output shader binary file ShaderStage stage = shaderInfo->entryStage; + if (shaderInfo->pModuleData != nullptr) { + // Output shader binary file + const ShaderModuleData *moduleData = reinterpret_cast(shaderInfo->pModuleData); + auto moduleHash = reinterpret_cast(&moduleData->hash[0]); - dumpFile << "[" << getShaderStageAbbreviation(stage) << "SpvFile]\n"; - dumpFile << "fileName = " << getSpirvBinaryFileName(moduleHash) << "\n\n"; + dumpFile << "[" << getShaderStageAbbreviation(stage) << "SpvFile]\n"; + dumpFile << "fileName = " << getSpirvBinaryFileName(moduleHash) << "\n\n"; + } dumpFile << "[" << getShaderStageAbbreviation(stage) << "Info]\n"; // Output entry point @@ -762,6 +794,7 @@ void PipelineDumper::DumpPipelineBinary(PipelineDumpFile *dumpFile, GfxIpVersion ElfReader reader(gfxIp); size_t codeSize = pipelineBin->codeSize; + auto result = reader.ReadFromBuffer(pipelineBin->pCode, &codeSize); assert(result == Result::Success); (void(result)); // unused @@ -794,6 +827,27 @@ void PipelineDumper::DumpPipelineExtraInfo(PipelineDumpFile *dumpFile, const std dumpFile->dumpFile << *str; } +// ===================================================================================================================== +// Dump fragment outputs info to pipeline file. +// +// @param dumpFile : Dump file +// @param data : fragment output buffer +// @param size : buffer size +void PipelineDumper::DumpFragmentOutputs(PipelineDumpFile *dumpFile, const uint8_t *data, uint32_t size) { + if (!dumpFile) + return; + + assert(size % 4 == 0); + const uint32_t *intData = reinterpret_cast(data); + + dumpFile->dumpFile << "\n[FsOutput]\n"; + dumpFile->dumpFile << "data="; + for (unsigned idx = 0; idx < size / 4; idx++) { + dumpFile->dumpFile << intData[idx] << ", "; + } + dumpFile->dumpFile << "\n\n"; +} + // ===================================================================================================================== // Dumps LLPC version info to file // @@ -1026,6 +1080,8 @@ void PipelineDumper::dumpGraphicsStateInfo(const GraphicsPipelineBuildInfo *pipe dumpFile << "xfbOutInfo[" << idx << "].streamId = " << pXfbOutInfos[idx].streamId << "\n"; } dumpFile << "vbAddressLowBitsKnown = " << pipelineInfo->getGlState().vbAddressLowBitsKnown << "\n"; + dumpFile << "advancedBlendInfo.enableAdvancedBlend = " << pipelineInfo->advancedBlendInfo.enableAdvancedBlend << "\n"; + dumpFile << "advancedBlendInfo.binding = " << pipelineInfo->advancedBlendInfo.binding << "\n"; dumpPipelineOptions(&pipelineInfo->options, dumpFile); #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 @@ -1098,7 +1154,8 @@ void PipelineDumper::dumpGraphicsPipelineInfo(std::ostream *dumpFile, const char // clang-format on for (unsigned stage = 0; stage < ShaderStageGfxCount; ++stage) { const PipelineShaderInfo *shaderInfo = shaderInfos[stage]; - if (!shaderInfo->pModuleData) + if ((shaderInfo->pModuleData == nullptr) && (shaderInfo->options.clientHash.lower == 0) && + (shaderInfo->options.clientHash.upper == 0)) continue; dumpPipelineShaderInfo(shaderInfo, *dumpFile); } @@ -1494,6 +1551,9 @@ MetroHash::Hash PipelineDumper::generateHashForGraphicsPipeline(const GraphicsPi } } + hasher.Update(pipeline->advancedBlendInfo.enableAdvancedBlend); + hasher.Update(pipeline->advancedBlendInfo.binding); + MetroHash::Hash hash = {}; hasher.Finalize(hash.bytes); @@ -1654,10 +1714,7 @@ void PipelineDumper::updateHashForNonFragmentState(const GraphicsPipelineBuildIn hasher->Update(pipeline->rsState.provokingVertexMode); } - if (pipeline->gs.pModuleData || pipeline->tcs.pModuleData || pipeline->tes.pModuleData || - pipeline->gs.options.clientHash.lower != 0 || pipeline->gs.options.clientHash.upper != 0 || - pipeline->tcs.options.clientHash.lower != 0 || pipeline->tcs.options.clientHash.upper != 0 || - pipeline->tes.options.clientHash.lower != 0 || pipeline->tes.options.clientHash.upper != 0) + if (isValidShaderInfo(pipeline->gs) || isValidShaderInfo(pipeline->tcs) || isValidShaderInfo(pipeline->tes)) hasher->Update(iaState->patchControlPoints); hasher->Update(iaState->disableVertexReuse); hasher->Update(iaState->switchWinding); @@ -1823,8 +1880,7 @@ void PipelineDumper::updateHashForPipelineOptions(const PipelineOptions *options // @param [in/out] hasher : Hasher to generate hash code void PipelineDumper::updateHashForPipelineShaderInfo(ShaderStage stage, const PipelineShaderInfo *shaderInfo, bool isCacheHash, MetroHash64 *hasher) { - if (shaderInfo->pModuleData || (shaderInfo->options.clientHash.lower != 0) || - (shaderInfo->options.clientHash.upper != 0)) { + if (isValidShaderInfo(*shaderInfo)) { hasher->Update(stage); if ((shaderInfo->options.clientHash.lower != 0) || (shaderInfo->options.clientHash.upper != 0)) { hasher->Update(shaderInfo->options.clientHash); diff --git a/tool/dumper/vkgcPipelineDumper.h b/tool/dumper/vkgcPipelineDumper.h index 97b49ff2f7..aebe21832d 100644 --- a/tool/dumper/vkgcPipelineDumper.h +++ b/tool/dumper/vkgcPipelineDumper.h @@ -66,6 +66,8 @@ class PipelineDumper { static void DumpPipelineExtraInfo(PipelineDumpFile *binaryFile, const std::string *str); + static void DumpFragmentOutputs(PipelineDumpFile *dumpFile, const uint8_t *data, uint32_t size); + static MetroHash::Hash generateHashForGraphicsPipeline(const GraphicsPipelineBuildInfo *pipeline, bool isCacheHash, UnlinkedShaderStage unlinkedShaderType = UnlinkedStageCount); @@ -111,6 +113,8 @@ class PipelineDumper { // Returns the hash for the glue shader that corresponds to the given glue shader string. static const MetroHash::Hash generateHashForGlueShader(BinaryData glueShaderString); + static bool isValidShaderInfo(const PipelineShaderInfo &info); + private: static std::string getSpirvBinaryFileName(const MetroHash::Hash *hash); diff --git a/tool/vfx/vfx.h b/tool/vfx/vfx.h index 85e99a92af..ed01df3c51 100644 --- a/tool/vfx/vfx.h +++ b/tool/vfx/vfx.h @@ -36,6 +36,8 @@ #include #include #include +#include +#include #define VFX_VERSION 0x10000 #define VFX_REVISION 1 @@ -500,6 +502,15 @@ struct ColorBuffer { unsigned blendSrcAlphaToColor; // Whether source alpha is blended to color channels for this target at draw time }; +// ===================================================================================================================== +// Represents the graphics library type. +enum GraphicsLibraryType : uint32_t { + GraphicsLibraryPreRaster, + GraphicsLibraryFragment, + GraphicsLibraryColorExport, + GraphicsLibraryCount +}; + }; // namespace Vfx #if VFX_SUPPORT_VK_PIPELINE @@ -509,6 +520,7 @@ enum VfxPipelineType : unsigned { VfxPipelineTypeGraphics = 0, VfxPipelineTypeCompute, VfxPipelineTypeRayTracing, + VfxPipelineTypeGraphicsLibrary }; // ===================================================================================================================== @@ -521,6 +533,8 @@ struct VfxPipelineState { Vkgc::RayTracingPipelineBuildInfo rayPipelineInfo; // Vkgc ray tracing pipeline build info unsigned numStages; // Number of shader source sections Vfx::ShaderSource *stages; // Shader source sections + std::string graphicsLibFileName[Vfx::GraphicsLibraryCount]; + std::vector fsOutputs; }; typedef struct VfxPipelineState *VfxPipelineStatePtr; diff --git a/tool/vfx/vfxParser.cpp b/tool/vfx/vfxParser.cpp index a98b2efe32..a661e1d7de 100644 --- a/tool/vfx/vfxParser.cpp +++ b/tool/vfx/vfxParser.cpp @@ -636,6 +636,9 @@ Section *Document::createSection(const char *sectionName) { case SectionTypeShader: section = new SectionShader(it->second); break; + case SectionTypFsOutput: + section = new SectionFsOutput(); + break; default: VFX_NEVER_CALLED(); break; diff --git a/tool/vfx/vfxPipelineDoc.cpp b/tool/vfx/vfxPipelineDoc.cpp index 9f6cd272c0..1039d7782f 100644 --- a/tool/vfx/vfxPipelineDoc.cpp +++ b/tool/vfx/vfxPipelineDoc.cpp @@ -67,12 +67,17 @@ unsigned PipelineDocument::getMaxSectionCount(SectionType type) { case SectionTypeResourceMapping: maxSectionCount = 1; break; + case SectionTypeGraphicsLibrary: + maxSectionCount = 1; case SectionTypeShader: maxSectionCount = UINT32_MAX; break; case SectionTypeShaderInfo: maxSectionCount = UINT32_MAX; break; + case SectionTypFsOutput: + maxSectionCount = 1; + break; default: break; } @@ -100,6 +105,16 @@ VfxPipelineStatePtr PipelineDocument::getDocument() { // Section "Version" m_pipelineState.version = Version; + // Section "GraphicsLibrary" + if (m_sections[SectionTypeGraphicsLibrary].size() > 0) { + m_pipelineState.pipelineType = VfxPipelineTypeGraphicsLibrary; + reinterpret_cast(m_sections[SectionTypeGraphicsLibrary][0]) + ->getSubState(m_fileName, m_pipelineState.graphicsLibFileName); + + // If a pipeline contains this section, we will compile these libraries separately. + return &m_pipelineState; + } + // Section "GraphicsPipelineState" if (m_sections[SectionTypeGraphicsState].size() > 0) { m_pipelineState.pipelineType = VfxPipelineTypeGraphics; @@ -232,6 +247,11 @@ VfxPipelineStatePtr PipelineDocument::getDocument() { DeduplicateResourceMappingData(resourceMapping); } + if (m_sections[SectionTypFsOutput].size() > 0) { + auto section = reinterpret_cast(m_sections[SectionTypFsOutput][0]); + section->getSubState(m_pipelineState.fsOutputs); + } + return &m_pipelineState; } @@ -258,11 +278,6 @@ bool PipelineDocument::validate() { } } - if (stageMask == 0) { - PARSE_ERROR(m_errorMsg, 0, "No Shader source section in pipeline!\n"); - return false; - } - const unsigned graphicsStageMask = ShaderStageBit::ShaderStageAllGraphicsBit; const unsigned computeStageMask = ShaderStageBit::ShaderStageComputeBit; const unsigned rayTracingStageMask = ShaderStageAllRayTracingBit; @@ -353,6 +368,9 @@ Section *PipelineDocument::createSection(const char *sectionName) { case SectionTypeResourceMapping: section = new SectionResourceMapping(); break; + case SectionTypeGraphicsLibrary: + section = new SectionGraphicsLibrary(); + break; default: section = Document::createSection(sectionName); break; @@ -393,6 +411,7 @@ bool PipelineDocument::getPtrOfSubSection(Section *section, unsigned lineNum, co CASE_SUBSECTION(MemberTypeGpurtFuncTable, SectionGpurtFuncTable) #endif CASE_SUBSECTION(MemberTypeExtendedRobustness, SectionExtendedRobustness) + CASE_SUBSECTION(MemberTypeAdvancedBlendInfo, SectionAdvancedBlendInfo) default: result = Document::getPtrOfSubSection(section, lineNum, memberName, memberType, isWriteAccess, arrayIndex, ptrOut, errorMsg); diff --git a/tool/vfx/vfxPipelineDoc.h b/tool/vfx/vfxPipelineDoc.h index 6e04d98b44..462bf4482c 100644 --- a/tool/vfx/vfxPipelineDoc.h +++ b/tool/vfx/vfxPipelineDoc.h @@ -72,7 +72,6 @@ class PipelineDocument : public Document { void DeduplicateResourceMappingData(Vkgc::ResourceMappingData *resourceMapping); VfxPipelineState m_pipelineState; // Contains the render state - Vkgc::TessellationLevel m_tessellationLevel; VkPipelineVertexInputStateCreateInfo m_vertexInputState; std::vector m_shaderSources; std::vector m_shaderInfos; diff --git a/tool/vfx/vfxSection.cpp b/tool/vfx/vfxSection.cpp index d4d9e6bc1d..9669a6864d 100644 --- a/tool/vfx/vfxSection.cpp +++ b/tool/vfx/vfxSection.cpp @@ -239,6 +239,7 @@ void Section::initSectionInfo() { INIT_SECTION_INFO("Version", SectionTypeVersion, 0) INIT_SECTION_INFO("CompileLog", SectionTypeCompileLog, 0) + INIT_SECTION_INFO("FsOutput", SectionTypFsOutput, 0) } // ===================================================================================================================== @@ -736,4 +737,16 @@ void SectionShader::getSubState(SectionShader::SubState &state) { state.stage = m_shaderStage; } +void SectionGraphicsLibrary::getSubState(const std::string &docFileName, SubState &state) { + // Prepend directory from "docFilename" to the given filename. + std::string path; + auto separatorIndex = docFileName.find_last_of("/\\"); + if (separatorIndex != std::string::npos) + path = docFileName.substr(0, separatorIndex + 1); + + state[GraphicsLibraryPreRaster] = path + m_preRaster; + state[GraphicsLibraryFragment] = path + m_fragment; + state[GraphicsLibraryColorExport] = path + m_colorExport; +} + } // namespace Vfx diff --git a/tool/vfx/vfxSection.h b/tool/vfx/vfxSection.h index 89abb90f3f..62c09d6e8b 100644 --- a/tool/vfx/vfxSection.h +++ b/tool/vfx/vfxSection.h @@ -67,6 +67,8 @@ enum SectionType : unsigned { SectionTypeResourceMapping, // Resource mapping section SectionTypeUniformConstantMapEntry, // UniformConstantMapEntry section SectionTypeUniformConstantMap, // UniformConstantMap section + SectionTypeGraphicsLibrary, // Graphics library section + SectionTypFsOutput, // Fragment output section // GL pipeline SectionTypeGlProgramParameter, // GL program parameter section SectionTypeGlGraphicsState, // GL graphic pipeline state section @@ -128,6 +130,7 @@ enum MemberType : unsigned { MemberTypeIndirectCalleeSavedRegs, // VFX member type: SectionIndirectCalleeSavedRegs MemberTypeGpurtFuncTable, // VFX member type: SectionGpurtFuncTable MemberTypeExtendedRobustness, // VFX member type: SectionExtendedRobustness + MemberTypeAdvancedBlendInfo, // VFX member type: SectionAdvancedBlendInfo MemberTypeGlAttribLocation, // GL vertex attribute location MemberTypeGlShaderInfo, // GL SPIRV parameters MemberTypeGlVertexAttrib, // GL vertex input attribute @@ -929,4 +932,55 @@ class SectionSpecInfo : public Section { std::vector m_vkMapEntries; // Vulkan specialization map entry }; +// ===================================================================================================================== +// Represents the sub section GraphicsLibrary +class SectionGraphicsLibrary : public Section { +public: + typedef std::string SubState[Vfx::GraphicsLibraryCount]; + + SectionGraphicsLibrary() : Section(getAddrTable(), SectionTypeGraphicsLibrary, "GraphicsLibrary") {} + + void getSubState(const std::string &docFileName, SubState &state); + +private: + static StrToMemberAddrArrayRef getAddrTable() { + static std::vector addrTable = []() { + std::vector addrTableInitializer; + INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsLibrary, m_preRaster, MemberTypeString, false); + INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsLibrary, m_fragment, MemberTypeString, false); + INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsLibrary, m_colorExport, MemberTypeString, false); + return addrTableInitializer; + }(); + return {addrTable.data(), addrTable.size()}; + } + + std::string m_preRaster; // Pre-Raster library file name + std::string m_fragment; // Fragment library file name + std::string m_colorExport; // Color export library file name +}; + +// ===================================================================================================================== +// Represents the sub section Metadata +class SectionFsOutput : public Section { +public: + typedef std::vector SubState; + + SectionFsOutput() : Section(getAddrTable(), SectionTypFsOutput, nullptr) { m_data = &m_bufMem; } + + void getSubState(SubState &state) { state = *m_data; } + +private: + static StrToMemberAddrArrayRef getAddrTable() { + static std::vector addrTable = []() { + std::vector addrTableInitializer; + INIT_MEMBER_NAME_TO_ADDR(SectionFsOutput, m_data, MemberTypeUArray, false); + return addrTableInitializer; + }(); + return {addrTable.data(), addrTable.size()}; + } + + SubState *m_data; // Fragment output + std::vector m_bufMem; // Buffer +}; + } // namespace Vfx diff --git a/tool/vfx/vfxVkSection.cpp b/tool/vfx/vfxVkSection.cpp index 595b98e3bb..a27f2c8f89 100644 --- a/tool/vfx/vfxVkSection.cpp +++ b/tool/vfx/vfxVkSection.cpp @@ -39,6 +39,7 @@ class VkSectionParserInit { INIT_SECTION_INFO("missInfo", SectionTypeShaderInfo, ShaderStage::ShaderStageRayTracingMiss) INIT_SECTION_INFO("callInfo", SectionTypeShaderInfo, ShaderStage::ShaderStageRayTracingCallable) INIT_SECTION_INFO("ResourceMapping", SectionTypeResourceMapping, 0) + INIT_SECTION_INFO("GraphicsLibrary", SectionTypeGraphicsLibrary, 0) }; void initEnumMap() { diff --git a/tool/vfx/vfxVkSection.h b/tool/vfx/vfxVkSection.h index bb7ad97833..e06eded576 100644 --- a/tool/vfx/vfxVkSection.h +++ b/tool/vfx/vfxVkSection.h @@ -797,6 +797,33 @@ class SectionXfbOutInfo : public Section { SubState m_state; }; +// ===================================================================================================================== +// Represents the sub section AdvancedBlendInfo +class SectionAdvancedBlendInfo : public Section { +public: + typedef Vkgc::AdvancedBlendInfo SubState; + + SectionAdvancedBlendInfo() : Section(getAddrTable(), SectionTypeUnset, "advancedBlendInfo") { + memset(&m_state, 0, sizeof(m_state)); + } + + void getSubState(SubState &state) { state = m_state; }; + SubState &getSubStateRef() { return m_state; }; + +private: + static StrToMemberAddrArrayRef getAddrTable() { + static std::vector addrTable = []() { + std::vector addrTableInitializer; + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionAdvancedBlendInfo, enableAdvancedBlend, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionAdvancedBlendInfo, binding, MemberTypeInt, false); + return addrTableInitializer; + }(); + return {addrTable.data(), addrTable.size()}; + } + + SubState m_state; +}; + // ===================================================================================================================== // Represents the section graphics state class SectionGraphicsState : public Section { @@ -882,6 +909,7 @@ class SectionGraphicsState : public Section { false); #endif INIT_MEMBER_DYNARRAY_NAME_TO_ADDR(SectionGraphicsState, m_xfbOutInfo, MemberTypeXfbOutInfo, true); + INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_advancedBlendInfo, MemberTypeAdvancedBlendInfo, true); return addrTableInitializer; }(); return {addrTable.data(), addrTable.size()}; @@ -896,6 +924,7 @@ class SectionGraphicsState : public Section { m_state.cbState.target[i].channelWriteMask = colorBuffer.channelWriteMask; m_state.cbState.target[i].format = colorBuffer.format; } + m_advancedBlendInfo.getSubState(m_state.advancedBlendInfo); m_options.getSubState(m_state.options); m_nggState.getSubState(m_state.nggState); #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 71 @@ -972,6 +1001,7 @@ class SectionGraphicsState : public Section { std::vector m_xfbOutInfo; std::vector m_xfbOutInfoData; unsigned m_usrClipPlaneMask; + SectionAdvancedBlendInfo m_advancedBlendInfo; }; // ===================================================================================================================== diff --git a/util/vkgcElfReader.cpp b/util/vkgcElfReader.cpp index f948731361..9473436be1 100644 --- a/util/vkgcElfReader.cpp +++ b/util/vkgcElfReader.cpp @@ -195,6 +195,25 @@ template void ElfReader::getSymbol(unsigned idx, ElfSymbol *sym symbol->info.all = symbols[idx].st_info.all; } +// ===================================================================================================================== +// Gets index of the symbol in the symbol table section according to the specified name. +// +// @param symbolName : Symbol name +template uint32_t ElfReader::getSymbolIndexByName(const char *symbolName) const { + auto §ion = m_sections[m_symSecIdx]; + const char *strTab = reinterpret_cast(m_sections[m_strtabSecIdx]->data); + + auto symbols = reinterpret_cast(section->data); + unsigned symCount = getSymbolCount(); + for (unsigned idx = 0; idx < symCount; ++idx) { + auto name = strTab + symbols[idx].st_name; + if (strcmp(name, symbolName) == 0) { + return idx; + } + } + return InvalidValue; +} + // ===================================================================================================================== // Gets the count of relocations in the relocation section. template unsigned ElfReader::getRelocationCount() const { diff --git a/util/vkgcElfReader.h b/util/vkgcElfReader.h index 2686ec915e..3a0bccad1e 100644 --- a/util/vkgcElfReader.h +++ b/util/vkgcElfReader.h @@ -460,6 +460,7 @@ template class ElfReader { uint32_t getSymbolCount() const; void getSymbol(uint32_t idx, ElfSymbol *symbol) const; + uint32_t getSymbolIndexByName(const char *symbolName) const; bool isValidSymbol(const char *symbolName); diff --git a/version/CMakeLists.txt b/version/CMakeLists.txt index 0342598380..53a57b6881 100644 --- a/version/CMakeLists.txt +++ b/version/CMakeLists.txt @@ -76,31 +76,45 @@ if (NOT DISABLE_LLPC_VERSION_USES_LLVM) endif() ### Cached Config-related Options ###################################################################################### +#if VKI_BUILD_NAVI12 option(LLPC_BUILD_NAVI12 "LLPC support for NAVI12?" ON) if (LLPC_BUILD_NAVI12) target_compile_definitions(llpc_version INTERFACE LLPC_BUILD_NAVI12 CHIP_HDR_NAVI12) endif() +#endif #if VKI_BUILD_NAVI32 option(LLPC_BUILD_NAVI32 "LLPC support for NAVI32?" ON) if (LLPC_BUILD_NAVI32) target_compile_definitions(llpc_version INTERFACE LLPC_BUILD_NAVI32 CHIP_HDR_NAVI32) endif() #endif +#if VKI_BUILD_REMBRANDT option(LLPC_BUILD_REMBRANDT "LLPC support for REMBRANDT?" ON) if (LLPC_BUILD_REMBRANDT) target_compile_definitions(llpc_version INTERFACE LLPC_BUILD_REMBRANDT CHIP_HDR_REMBRANDT) endif() +#endif +#if VKI_BUILD_RAPHAEL option(LLPC_BUILD_RAPHAEL "LLPC support for RAPHAEL?" ON) if (LLPC_BUILD_RAPHAEL) target_compile_definitions(llpc_version INTERFACE LLPC_BUILD_RAPHAEL CHIP_HDR_RAPHAEL) endif() +#endif +#if VKI_BUILD_MENDOCINO option(LLPC_BUILD_MENDOCINO "LLPC support for MENDOCINO?" ON) if (LLPC_BUILD_MENDOCINO) target_compile_definitions(llpc_version INTERFACE LLPC_BUILD_MENDOCINO CHIP_HDR_MENDOCINO) endif() +#endif #if VKI_BUILD_PHOENIX1 option(LLPC_BUILD_PHOENIX1 "LLPC support for PHOENIX1?" ON) if (LLPC_BUILD_PHOENIX1) target_compile_definitions(llpc_version INTERFACE LLPC_BUILD_PHOENIX1 CHIP_HDR_PHOENIX1) endif() #endif +#if VKI_BUILD_PHOENIX2 +option(LLPC_BUILD_PHOENIX2 "LLPC support for PHOENIX2?" OFF) +if (LLPC_BUILD_PHOENIX2) + target_compile_definitions(llpc_version INTERFACE LLPC_BUILD_PHOENIX2 CHIP_HDR_PHOENIX2) +endif() +#endif diff --git a/version/include/llpc/GpurtVersion.h b/version/include/llpc/GpurtVersion.h new file mode 100644 index 0000000000..5f6ea34051 --- /dev/null +++ b/version/include/llpc/GpurtVersion.h @@ -0,0 +1,106 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file GpurtVersion.h + * @brief Declare helpers used to pass GpuRt version info into LLPC. + *********************************************************************************************************************** + */ + +#ifdef LLPC_VERSION_GPURT_VERSION_H + +#ifndef __cplusplus +// Ensure this is included just once from HLSL, as we depend on HLSL defines for versioning flags +// that may be lost if an earlier include is made without defines. +#error "GpurtVersion.h may only be included once from HLSL" +#endif + +#else + +#define LLPC_VERSION_GPURT_VERSION_H + +#ifdef __cplusplus +#include +#endif + +// Helpers to pass versioning info from GpuRt to LLPC to stage changes during promotions. +// +// Sometimes there are changes that need to be done simultaneously in GpuRt and LLPC. +// +// It is not possible to stage such changes *reliably* using numeric versions. One could attempt +// to implement changes in A, guarded by a future version of B that is larger than the current one, +// and then do the change in B and bump the version. However, this has the problem that a different +// change in B in the meantime may bump the version, unintentionally enabling the change in A. +// +// A common pattern to stage such changes is add support for both in component A that is disabled by default, +// promote that, and then do the change in component B, simultaneously also somehow enabling the change in component A. +// +// Because this header is included into GpuRt, it allows to apply the staging scheme above with A=GpuRt, guarded +// by ifdefs on a define controlled from this header here, and then do the change in B=LLPC, setting the define. +// +// The following mechanism allows to do it in reverse, which sometimes is easier if the change on the compiler +// side is small. The idea is to pass a numerical constant from GpuRt to the compiler. The bits of this constant +// indicate whether specific changes are active, although in practice multiple active bits might be rare. +// The constant is encoded as length of an array of a helper type, which is the return type of _cont_GpurtVersionFlags. +// This way, we don't depend on compiler optimizations for the constant to be indeed a constant in IR. +// (As opposed to returning the constant from a function, and inspecting the function body in the compiler.) +// +// On the GpuRt side, we just need to set a define before including this header. This will then +// set the corresponding flag in the constant which is then included into the compiled module. +// If LLPC has already been updated to no longer depend on the flag, the define is ignored and +// can be removed on the GpuRt side. +// +// The process to stage changes using the mechanism below is: +// * Implement the change in LLPC, guarded by a newly added flag GpuRtVersionFlag::SomeChange. +// Include it into GpuRtVersionFlagsContainer, guarded by a new define SOME_CHANGE that is not set. +// * Implement the change in GpuRt, setting the define SOME_CHANGE before including the LLPC header. +// This changes GpuRtVersionFlagsContainer and LLPC will see the SomeChange flag as enabled. +// * Remove the flag SomeChange in LLPC, and change the guarded code assuming it to be enabled. +// * Remove the define SOME_CHANGE in GpuRt. +// +// Every value of this enum corresponds to a change controlled from GpuRt. Ensure the values use disjoint bits. +enum class GpuRtVersionFlag : uint32_t {}; + +#ifndef __cplusplus +// HLSL-only code to export a function _cont_GpurtVersionFlags, whose return type encodes enabled version flags. + +// Usage: For every flag, bit-or the flag into the length of dummy, guarded by a define for that flag. +struct GpuRtVersionFlagsContainer { + int dummy[0 + // Example: + // | (uint32_t)GpuRtVersionFlag::EnableSomeFeature + ]; +}; + +// This function is never called. It is exported by GpuRt, and LLPC inspects +// its return type to retrieve versioning flags. +export GpuRtVersionFlagsContainer _cont_GpurtVersionFlags() { + GpuRtVersionFlagsContainer result; + return result; +} + +#endif + +#endif diff --git a/version/include/llpcVersion.h.in b/version/include/llpcVersion.h.in index 916b1090b3..8a8db1f01b 100644 --- a/version/include/llpcVersion.h.in +++ b/version/include/llpcVersion.h.in @@ -37,6 +37,12 @@ // %Version History // | %Version | Change Description | // | -------- | ----------------------------------------------------------------------------------------------------- | +// | 72.0 | Enable std430 layout rule 9 to the OpenGL default uniform block | +// | 71.4 | Add PixelOpInternalBinding to InternalBinding. Add GlCompatibilityDrawPixelsType. Add enableBitmap to | +// | | glState. Add enableBitmapLsb to glState. Add enableTwoSideLighting to glState. Add drawPixelsType to | +// | | glState. Add pixelTransferScale to glState. Add pixelTransferBias to glState. | +// | 71.3 | Add IPipelineDumper::DumpGraphicsLibraryFileName. | +// | 71.2 | Add AdvancedBlendInfo to GraphicsPipelineBuildInfo | // | 71.1 | Add GraphNodeName to the GraphicsPipelineBuildInfo | // | 71.0 | Add glState to GraphicsPipelineBuildInfo. And move OGL status to glState. | // | 70.5 | Add vbAddressLowBitsKnown to Options. Add vbAddrLowBits to VertexInputDescription. | @@ -172,10 +178,10 @@ #pragma once /// LLPC major interface version. -#define LLPC_INTERFACE_MAJOR_VERSION 70 +#define LLPC_INTERFACE_MAJOR_VERSION 72 /// LLPC minor interface version. -#define LLPC_INTERFACE_MINOR_VERSION 1 +#define LLPC_INTERFACE_MINOR_VERSION 3 /// The client's LLPC major interface version #ifndef LLPC_CLIENT_INTERFACE_MAJOR_VERSION