Skip to content

Commit

Permalink
Update llpc from commit d6557077
Browse files Browse the repository at this point in the history
[Continuations] Add helper commonMaterializable()
[Continuations] Add one extra SROA
[Continuations] Add PostProcessing test.
[Continuations] Check that resume functions report no stack usage
[Continuations] Cleanup includes.
[Continuations] Fix corruption seen in RaytracingReflections
[Continuations] Fix first payload argument dword calculation
[Continuations] Fix outgoing register count for kernelentry
[Continuations] Get DispatchSystemData type from GetLocalRootIndex.
[Continuations] Handle gpurt GetSetting
[Continuations] Ignore life time markers when doing store-to-load forwarding
[Continuations] Keep _AmdAcceptHitAttributes calls
[Continuations] Lower _GetResumePoint correctly
[Continuations] Move stackptr lowering
[Continuations] Promote _AmdAwait arguments correctly
[Continuations] Refactor RT shader stage enum
[Continuations] Rematerialize single source shuffle
[Continuations] Remove old unused helper
[Continuations] Remove use of [[no_unique_address]]
[Continuations] Set up proper register count for Traversal
[Continuations] Spill in PayloadCopyHelper
[Continuations] Support TriangleVertexPositions in continuations
[Continuations] Update test to reduce diff
[Continuations] Use int as name for gpurt settings
[Continuations] Use llvmraytracing in more places
[RT] Set the entry name as modulename
Add 'override' to SPIRVBaseVariable::validate()
Add AmdExtD3DShaderIntrinsics_ShaderMarker
Add DbgInfo api to ModuleBunch
Add detecting more patterns which can be lowered into fmul_legacy and fma_legacy
Add dynamicVgprBlockSize option
Add GpurtVersionFlags
Add Phoenix2 support
Add ShaderModuleUsage::usePrimitiveId
Apply WQM to result of WWM subgroup operations
Clean GEP formation for NGG atomic add
Cleanup the old PAL metadata code path
CrossModuleInliner: add check for consistent target module
CrossModuleInliner: make struct layout independent of NDEBUG
DataLayout index sizes for SPIRV address spaces
Detect read-only buffers and mark them as invariant
Expand GEP compatibility in tests
Handle new return type for some IRBuilder methods
Handle new return type from DIBuilder
lgc: add strided buffer load
lgc: correct stage detection for subgroup builder
lgc: missing code for load.strided.buffer.desc
lgc: Add LoadBufferAddrOp
lgc: Allow hoisting common instructions
lgc: Disassemble archive-of-ELFs
lgc: move CreateDebugBreak to BuilderCommon
lgc: Set flat workgroup size for cs_chain
lgc: Unbreak dxcp build
LowerRayTracingPipeline: Avoid redundant GEPs
Move replaceAllPointerUses into CompilerUtils
Pack mesh shader outputs in LDS space
Reduce SW XFB LDS usage
Refine advanced blend func with value-type arguments
Refine dump info for shader module id
Refine pipeline dump when shader module identifier is used
Remove checks for gfxip >= 10
Remove LDS granularity rounding
Remove ShaderModuleUsage::useGenericBuiltIn
Remove the accidentally-added file for *.pipe.swp
Report new RequireFullPipeline error
Return frozen poison value in _AmdGetUninitialized
Return the state whether the texture is accessed by texelFetch function
Set MaxOutgoingVgprCount into SGPR and pass it
Simplify LDS GEP formation
Some renaming in mesh shader
Support uint64 AmdTraceRaySampleGpuTimer()
TargetFeatures for amdgpu_cs_chain_preserve funcs
Update llvm-dialects submodule
Use 32-bit GEPs consistently
Use CmpInst::Create instead of new to create instruction
Use convertUsersOfConstantsToInstruction throughout
VertexFetch: vertex offset is added twice in vertex offset mode
Fix check_llpc test regression
Fix color export offline compilation
Fix compiler warnings
Fix error on unused private field
Fix ShaderDbg issues
Fix the check for reserving a dummy vertex attribute
Fix unusual case imported by new game:STARFIELD
Fix updating threshold issue when merge with pipeline cache which cause PAL assert
  • Loading branch information
qiaojbao committed May 14, 2024
1 parent 14535c0 commit 266215c
Show file tree
Hide file tree
Showing 284 changed files with 20,281 additions and 96,141 deletions.
3 changes: 3 additions & 0 deletions .typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,8 @@ USCALED = "USCALED"
Datas = "Datas"
HSA = "HSA"
VALU = "VALU"
dne = "dne"
offen = "offen"
varing = "varing"
Derivate = "Derivate"
Fo = "Fo"
10 changes: 8 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ if(ICD_BUILD_LLPC)
add_subdirectory(util ${PROJECT_BINARY_DIR}/util)
endif()

add_subdirectory(gfxruntime ${PROJECT_BINARY_DIR}/gfxruntime)

### VKGC build LLPC ################################################################
if(ICD_BUILD_LLPC)
include("cmake/compilerutils.cmake")
Expand Down Expand Up @@ -171,6 +173,10 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set_property(TARGET vfx PROPERTY FOLDER Compiler)
endif()

if (TARGET gfxruntime)
set_property(TARGET gfxruntime PROPERTY FOLDER Compiler)
endif()

if(ICD_BUILD_LLPC)
set_property(TARGET llpc PROPERTY FOLDER Compiler)
set_property(TARGET llpcinternal PROPERTY FOLDER Compiler)
Expand Down Expand Up @@ -212,8 +218,8 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
endif()
set_property(TARGET check-amdllpc PROPERTY FOLDER "LLPC Tests")
set_property(TARGET check-amdllpc-units PROPERTY FOLDER "LLPC Tests")
set_property(TARGET check-continuations PROPERTY FOLDER "Continuations Tests")
set_property(TARGET check-continuations-units PROPERTY FOLDER "Continuations Tests")
set_property(TARGET check-llvmraytracing PROPERTY FOLDER "LLVMRaytracing Tests")
set_property(TARGET check-llvmraytracing-units PROPERTY FOLDER "LLVMRaytracing Tests")
set_property(TARGET check-lgc-units PROPERTY FOLDER "LGC Tests")
endif()
endif()
Expand Down
1 change: 1 addition & 0 deletions cmake/CompilerFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ function(set_compiler_options PROJECT_NAME ENABLE_WERROR)
target_compile_options("${PROJECT_NAME}" PRIVATE
# Output with color if in terminal: https://github.com/ninja-build/ninja/wiki/FAQ
-fcolor-diagnostics
-Werror=unused-private-field
-Wno-covered-switch-default
-Wno-extra-semi
-Wno-gnu-anonymous-struct
Expand Down
31 changes: 31 additions & 0 deletions compilerutils/include/compilerutils/CompilerUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ llvm::Function *cloneFunctionHeader(llvm::Function &f, llvm::FunctionType *newTy
llvm::Function *cloneFunctionHeader(llvm::Function &f, llvm::FunctionType *newType,
llvm::ArrayRef<llvm::AttributeSet> argAttrs, llvm::Module *targetModule = nullptr);

// Add an unreachable at the current position and remove the rest of the basic block.
void createUnreachable(llvm::IRBuilder<> &b);

struct CrossModuleInlinerResult {
llvm::Value *returnValue;
llvm::iterator_range<llvm::Function::iterator> newBBs;
Expand Down Expand Up @@ -104,9 +107,37 @@ class CrossModuleInliner {
llvm::GlobalValue *findCopiedGlobal(llvm::GlobalValue &sourceGv, llvm::Module &targetModule);

private:
// Checks that we haven't processed a different target module earlier.
void checkTargetModule(llvm::Module &targetModule) {
if (lastUsedTargetModule == nullptr)
lastUsedTargetModule = &targetModule;
else
assert(lastUsedTargetModule == &targetModule);
}

llvm::SmallDenseMap<llvm::GlobalValue *, llvm::GlobalValue *> mappedGlobals;
llvm::Module *lastUsedTargetModule = nullptr; // used to check that we don't use different target modules
};

// Essentially RAUW for pointers for the case that these use different address
// spaces, rewriting all derived pointers to also use the new address space.
// Writes instructions which are redundant after the replacement into
// the given ToBeRemoved vector.
// The caller has to handle the erasure afterwards.
void replaceAllPointerUses(llvm::IRBuilder<> *builder, llvm::Value *oldPointerValue, llvm::Value *newPointerValue,
llvm::SmallVectorImpl<llvm::Instruction *> &toBeRemoved);

} // namespace CompilerUtils

namespace llvm {

// Replacement for PointerType::getWithSamePointeeType that works with new LLVM.
// Returns a typed pointer type if the pointer type is typed.
//
// TODO: Remove this as soon as all internal users of opaque pointers have been
// fixed.
PointerType *getWithSamePointeeType(PointerType *ptrTy, unsigned addressSpace);

} // namespace llvm

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ template <typename KeyT, typename ValueT, bool TrackReverse> class LoweringPoint
using ReverseMap = std::conditional_t<TrackReverse, llvm::DenseMap<ValueT, llvm::SmallVector<uintptr_t>>, Empty>;

/// If requested, track the locations in which each value is mentioned.
[[no_unique_address]] ReverseMap m_reverseMap;
ReverseMap m_reverseMap;

/// Map keys to values.
///
Expand Down
140 changes: 140 additions & 0 deletions compilerutils/lib/CompilerUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"

Expand Down Expand Up @@ -135,6 +136,20 @@ Function *CompilerUtils::cloneFunctionHeader(Function &f, FunctionType *newType,
return cloneFunctionHeader(f, newType, attributes, targetModule);
}

void CompilerUtils::createUnreachable(llvm::IRBuilder<> &b) {
auto *unreachable = b.CreateUnreachable();
auto it = ++unreachable->getIterator();
auto *bb = unreachable->getParent();
if (it == bb->end())
return;

// Remove rest of BB
auto *oldCode = BasicBlock::Create(b.getContext(), "", bb->getParent());
oldCode->splice(oldCode->end(), bb, it, bb->end());
oldCode->replaceSuccessorsPhiUsesWith(bb, oldCode);
DeleteDeadBlock(oldCode);
}

namespace {

// Get the name of a global that is copied to a different module for inlining.
Expand Down Expand Up @@ -220,6 +235,7 @@ class CrossModuleValueMaterializer : public ValueMaterializer {
iterator_range<Function::iterator> CompilerUtils::CrossModuleInliner::inlineCall(CallBase &cb) {
auto *calleeFunc = cb.getCalledFunction();
assert(calleeFunc && "Cannot find called function");
checkTargetModule(*cb.getFunction()->getParent());
LLVM_DEBUG(dbgs() << "Inlining '" << calleeFunc->getName() << "' across modules\n");

Function *targetFunc = cb.getFunction();
Expand Down Expand Up @@ -346,6 +362,7 @@ CompilerUtils::CrossModuleInliner::inlineCall(IRBuilder<> &b, llvm::Function *ca
GlobalValue *CompilerUtils::CrossModuleInliner::findCopiedGlobal(GlobalValue &sourceGv, Module &targetModule) {
assert(sourceGv.getParent() != &targetModule && "This function only finds copies across modules");
assert(sourceGv.hasName() && "Cannot find a global value that does not have a name");
checkTargetModule(targetModule);

if (auto found = mappedGlobals.find(&sourceGv); found != mappedGlobals.end()) {
assert(found->second->getParent() == &targetModule &&
Expand All @@ -358,3 +375,126 @@ GlobalValue *CompilerUtils::CrossModuleInliner::findCopiedGlobal(GlobalValue &so
assert(gv->getValueType() == sourceGv.getValueType());
return gv;
}

PointerType *llvm::getWithSamePointeeType(PointerType *ptrTy, unsigned addressSpace) {
#if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 482880
return PointerType::getWithSamePointeeType(ptrTy, addressSpace);
#else
// New version of the code (also handles unknown version, which we treat as
// latest)
return PointerType::get(ptrTy->getContext(), addressSpace);
#endif
}

void CompilerUtils::replaceAllPointerUses(IRBuilder<> *builder, Value *oldPointerValue, Value *newPointerValue,
SmallVectorImpl<Instruction *> &toBeRemoved) {
// Note: The implementation explicitly supports typed pointers, which
// complicates some of the code below.

// Assert that both types are pointers that only differ in the address space.
PointerType *oldPtrTy = cast<PointerType>(oldPointerValue->getType());
(void)oldPtrTy;
PointerType *newPtrTy = cast<PointerType>(newPointerValue->getType());
unsigned newAS = newPtrTy->getAddressSpace();
assert(newAS != oldPtrTy->getAddressSpace());
assert(getWithSamePointeeType(oldPtrTy, newAS) == newPtrTy);

oldPointerValue->mutateType(newPtrTy);

// Traverse through the users and setup the addrspace
SmallVector<Use *> worklist(make_pointer_range(oldPointerValue->uses()));
oldPointerValue->replaceAllUsesWith(newPointerValue);

// Given a pointer type, get a pointer with the same pointee type (possibly
// opaque) as the given type that uses the newAS address space.
auto getMutatedPtrTy = [newAS](Type *ty) {
PointerType *ptrTy = cast<PointerType>(ty);
// Support typed pointers:
return getWithSamePointeeType(ptrTy, newAS);
};

while (!worklist.empty()) {
Use *ptrUse = worklist.pop_back_val();
Value *ptr = cast<Value>(ptrUse);
Instruction *inst = cast<Instruction>(ptrUse->getUser());
LLVM_DEBUG(dbgs() << "Visiting " << *inst << '\n');
// In the switch below, "break" means to continue with replacing
// the users of the current value, while "continue" means to stop at
// the current value, and proceed with next one from the work list.
auto usesRange = make_pointer_range(inst->uses());
switch (inst->getOpcode()) {
default:
LLVM_DEBUG(inst->dump());
llvm_unreachable("Unhandled instruction\n");
break;
case Instruction::Call: {
if (inst->isLifetimeStartOrEnd()) {
// The lifetime marker is not useful anymore.
inst->eraseFromParent();
} else {
LLVM_DEBUG(inst->dump());
llvm_unreachable("Unhandled call instruction\n");
}
// No further processing needed for the users.
continue;
}
case Instruction::Load:
case Instruction::Store:
// No further processing needed for the users.
continue;
case Instruction::InsertValue:
// For insertvalue, there could be 2 cases:
// Assume %ptr = ptrtoint ... to i32
// (1) %inserted = insertvalue [2 x i32] poison, i32 %ptr, 0
// (2) %0 = bitcast i32 %ptr to [2 x i16]
// %inserted = insertvalue [2 x i16], i32 1, 0
// For (1), no further handling is needed; For (2), we are modifying the
// pointer and need to track all users of %inserted.
if (cast<InsertValueInst>(inst)->getAggregateOperand() == ptr) {
break;
}
continue;
case Instruction::And:
case Instruction::Add:
case Instruction::PtrToInt:
break;
case Instruction::BitCast: {
// This can happen with typed pointers
assert(cast<BitCastOperator>(inst)->getSrcTy()->isPointerTy() &&
cast<BitCastOperator>(inst)->getDestTy()->isPointerTy());
inst->mutateType(getMutatedPtrTy(inst->getType()));
break;
}
case Instruction::AddrSpaceCast:
// Check that the pointer operand has already been fixed
assert(inst->getOperand(0)->getType()->getPointerAddressSpace() == newAS);
// Push the correct users before RAUW.
worklist.append(usesRange.begin(), usesRange.end());
inst->mutateType(getMutatedPtrTy(inst->getType()));
// Since we are mutating the address spaces of users as well,
// we can just use the (already mutated) cast operand.
inst->replaceAllUsesWith(inst->getOperand(0));
toBeRemoved.push_back(inst);
continue;
case Instruction::IntToPtr:
case Instruction::GetElementPtr: {
inst->mutateType(getMutatedPtrTy(inst->getType()));
break;
}
case Instruction::Select: {
auto *oldType = inst->getType();
if (oldType->isPointerTy()) {
Type *newType = getMutatedPtrTy(oldType);
// No further processing if the type has the correct pointer type
if (newType == oldType)
continue;

inst->mutateType(newType);
}
break;
}
}

worklist.append(usesRange.begin(), usesRange.end());
}
}
87 changes: 87 additions & 0 deletions gfxruntime/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
##
#######################################################################################################################
#
# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
#######################################################################################################################

add_library(gfxruntime)

# Locate python binary
# No particular version of python3 is necessary
find_package(Python3
# Disable information messages
QUIET
# Python3 is required to run the shader compile script
REQUIRED
# Only the interpreter is required, we don't need the Compiler, Development, or NumPy
COMPONENTS Interpreter
)

# Locate dxc binary.
if (CMAKE_HOST_SYSTEM_NAME MATCHES "Linux")
find_program(DXC_PATH dxc)
if ("${DXC_PATH}" STREQUAL "DXC_PATH-NOTFOUND")
message(FATAL_ERROR "Could not find shader compiler tool dxc.")
endif()
#if _WIN32
elseif(WIN32)
set(DXC_PATH "$ENV{DK_ROOT}/DirectXShaderCompiler/8c9d92b/bin")
if (NOT EXISTS "${DXC_PATH}")
message(FATAL_ERROR "Unable to find DirectXShaderCompiler directory: ${DXC_PATH}")
endif()
#endif
endif()

set(CMAKE_CURRENT_SOURCE_DIR ${PROJECT_SOURCE_DIR}/gfxruntime)
set(CMAKE_CURRENT_BINARY_DIR ${PROJECT_BINARY_DIR}/gfxruntime)

### Auto-generated advanced blend runtime library for graphics shaders #################################################
set(GEN_ADVANCED_BLEND_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/tools/CompileAdvancedBlendShader.py")
set(ADVANCED_BLEND_INPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/shaders/AdvancedBlend.hlsl")
set(ADVANCED_BLEND_OUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/g_AdvancedBlendLibrary_spv.h")
add_custom_command(
OUTPUT
${ADVANCED_BLEND_OUT_FILE}
DEPENDS
${ADVANCED_BLEND_INPUT_FILE}
${GEN_ADVANCED_BLEND_SCRIPT}
COMMAND Python3::Interpreter "${GEN_ADVANCED_BLEND_SCRIPT}"
--compilerPath "${DXC_PATH}"
--shaderDir "${CMAKE_CURRENT_SOURCE_DIR}/src/shaders"
--outputDir "${CMAKE_CURRENT_BINARY_DIR}"
COMMENT "Generating advanced blend runtime library"
)

target_sources(gfxruntime
PRIVATE
${ADVANCED_BLEND_OUT_FILE}
${CMAKE_CURRENT_SOURCE_DIR}/src/GfxRuntimeLibrary.cpp
)
target_include_directories(gfxruntime
PUBLIC
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/include
)

if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
target_compile_options(gfxruntime PRIVATE -fPIC)
endif()
Loading

0 comments on commit 266215c

Please sign in to comment.